annotate repex_full_clustering.xml @ 21:26cef01c9066 draft

Uploaded
author petrn
date Mon, 06 Jan 2020 10:27:41 +0000
parents 12318e81cbc5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
21
26cef01c9066 Uploaded
petrn
parents: 18
diff changeset
1 <tool id="repeatexplorer2x" name="RepeatExplorer2 clustering: " version="2.3.6" >
8
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
2 <stdio>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
3 <regex match="lastdb: can't open file: NEAR" source="stderr" level="fatal" description="Version of last is too old, use ver 956 or higher\n" />
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
4 <regex match="Traceback" source="stderr" level="fatal" description="Unknown error" />
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
5 <regex match="error" source="stderr" level="fatal" description="Unknown error" />
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
6 <regex match="Warning" source="stderr" level="warning" description="Unknown error" />
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
7 <exit_code range="1:" level="fatal" description="Error" />
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
8 </stdio>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
9 <description>Improved version or repeat discovery and characterization using graph based sequence clustering</description>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
10 <requirements>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
11 <requirement type="package" version="0.9.1" >pyrserve</requirement>
21
26cef01c9066 Uploaded
petrn
parents: 18
diff changeset
12 <requirement type="package" version="3.7.4">python</requirement>
18
12318e81cbc5 Uploaded
petrn
parents: 17
diff changeset
13 <requirement type="package">last</requirement>
8
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
14 <requirement type="package">mafft</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
15 <requirement type="package">imagemagick</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
16 <requirement type="package">blast</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
17 <requirement type="package">diamond</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
18 <requirement type="package">blast-legacy</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
19 <requirement type="package">r-igraph</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
20 <requirement type="package">r-data.tree</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
21 <requirement type="package">r-stringr</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
22 <requirement type="package">r-r2html</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
23 <requirement type="package">r-hwriter</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
24 <requirement type="package">r-dt</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
25 <requirement type="package">r-scales</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
26 <requirement type="package">r-plotrix</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
27 <requirement type="package">r-png</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
28 <requirement type="package">r-plyr</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
29 <requirement type="package">r-dplyr</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
30 <requirement type="package">r-optparse</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
31 <requirement type="package">r-dbi</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
32 <requirement type="package">r-rsqlite</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
33 <requirement type="package">r-rserve</requirement>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
34 <requirement type="package">bioconductor-biostrings</requirement>
17
7ba7048d9579 Uploaded
petrn
parents: 13
diff changeset
35 <requirement type="package" version="1.0">repex_tarean</requirement>
13
09c1934e0c45 Uploaded
petrn
parents: 8
diff changeset
36 <requirement type="set_environment">REPEX</requirement>
8
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
37 </requirements>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
38 <command >
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
39 export PYTHONHASHSEED=0;
13
09c1934e0c45 Uploaded
petrn
parents: 8
diff changeset
40 \${REPEX}/seqclust --sample ${sample} --output_dir=tarean_output --logfile=${log} --cleanup $paired --taxon $taxon
8
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
41
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
42 #if $advanced_options.advanced:
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
43 --mincl $advanced_options.size_threshold $advanced_options.keep_names $advanced_options.automatic_filtering -D $advanced_options.blastx.options_blastx
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
44 --assembly_min $advanced_options.assembly_min_cluster_size
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
45
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
46 #if $advanced_options.comparative.options_comparative:
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
47 --prefix_length $advanced_options.comparative.prefix_length
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
48 #end if
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
49
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
50 #if $advanced_options.custom_library.options_custom_library:
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
51 -d $advanced_options.custom_library.library extra_database
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
52 #end if
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
53
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
54 #if $advanced_options.options.options:
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
55 -opt $advanced_options.options.options
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
56 #end if
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
57 #end if
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
58 ${FastaFile} >stdout.log 2> stderr.log ;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
59 echo "STDOUT CONTENT:" >> ${log} ;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
60 cat stdout.log >> ${log} ;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
61 echo "STDERR CONTENT:" >> ${log};
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
62 cat stderr.log >> ${log} &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
63 cd tarean_output &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
64 zip -r ${ReportArchive}.zip * &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
65 mv ${ReportArchive}.zip ${ReportArchive} &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
66 cp index.html ${ReportFile} &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
67 mkdir ${ReportFile.files_path} &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
68 cp -r --parents libdir ${ReportFile.files_path} &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
69 cp -r --parents seqclust/clustering/superclusters ${ReportFile.files_path} &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
70 cp -r --parents seqclust/clustering/clusters ${ReportFile.files_path} &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
71 cp seqclust/clustering/hitsort.cls ${ReportFile.files_path}/seqclust/clustering/hitsort.cls &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
72 cp *.png ${ReportFile.files_path}/ &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
73 cp *.csv ${ReportFile.files_path}/ &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
74 cp *.html ${ReportFile.files_path}/ &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
75 cp *.css ${ReportFile.files_path}/ &amp;&amp;
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
76 cp *.fasta ${ReportFile.files_path}/ 2>>$log &amp;&amp; rm -r ../tarean_output || :
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
77
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
78 </command>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
79 <inputs>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
80 <param name="FastaFile" label="NGS reads" type="data" format="fasta"
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
81 help="Input file must contain fasta-formatted NGS reads. If paired end reads are used, reads must be interlaced and all pairs must be complete. Example of input data format is provided in the help below. "/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
82 <param name="paired" type="boolean" truevalue="--paired" falsevalue="" checked="True" label="Paired-end reads" help="Check if you are using pair reads and input sequences contain both read mates and left mates alternate with their right mates" />
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
83
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
84 <param name="sample" label="Sample size" type="integer" value="500000" min="10000"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
85 <param name="taxon" label="Select taxon and protein domain database version (REXdb)" type="select" help="Reference database of transposable element protein domains - REXdb - is used for annotation of repeats">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
86 <option value="VIRIDIPLANTAE3.0" selected="true">Viridiplantae version 3.0 </option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
87 <option value="VIRIDIPLANTAE2.2" selected="true">Viridiplantae version 2.2</option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
88 <option value="METAZOA3.0" >Metazoa version 3.0</option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
89 <option value="METAZOA2.0" >Metazoa version 2.0</option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
90 <!-- Modify setting in config.py accordingly -->
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
91 </param>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
92
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
93 <conditional name="advanced_options">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
94 <param name="advanced" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Advanced options" />
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
95 <when value="false">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
96 <!-- pass -->
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
97 </when>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
98 <when value="true">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
99 <conditional name="comparative">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
100 <param name="options_comparative" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Perform comparative analysis" help="Use this options when you want to compare sequences multiple groups"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
101 <when value="false">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
102 <!-- do nothing here -->
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
103 </when>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
104 <when value="true">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
105 <param name="prefix_length" label="Group code length" type="integer" value="3" min="1" max="10" help="For comparative analysis, sequences are from individial groups distinguished by sample code which must be used as prefix for sequence name. See example below."/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
106 </when>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
107 </conditional>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
108
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
109 <conditional name="blastx">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
110 <param name="options_blastx" type="select" label="Select parameters for protein domain search">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
111 <option value="BLASTX_W2" selected="false">blastx with word size 2 (the most sensitive, slowest)</option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
112 <option value="BLASTX_W3" selected="true">blastx with word size 3 (default)</option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
113 <option value="DIAMOND" selected="false">diamond program (the least sensitive, fastest)</option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
114 </param>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
115 </conditional>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
116
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
117 <conditional name="options">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
118 <param name="options" type="select" label="Similarity search options" help="Different similarity search parameters are used depending on the used input data to adjust search to differences in length and error rate">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
119 <option value="ILLUMINA" selected="true">Illumina reads, read length 100nt or more </option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
120 <option value="ILLUMINA_SHORT" selected="false">Illumina reads, shorter than 100nt (Do not use reads shorter than 50nt!) </option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
121 <option value="ILLUMINA_DUST_OFF" selected="false">Illumina reads, no masking of low complexity repeats </option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
122 <option value="OXFORD_NANOPORE" selected="false">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
123 Pseudo short reads simulated from Oxford Nanopore data (experimental feature)
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
124 </option>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
125 </param>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
126 </conditional>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
127
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
128 <conditional name="custom_library">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
129 <param name="options_custom_library" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Use custom repeat database"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
130 <when value="false">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
131 <!-- do nothing here -->
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
132 </when>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
133 <when value="true">
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
134 <param name="library" format="fasta" type="data" label="Custom library of repeats" help="Library of repeats as DNA sequences in fasta format. The required format for IDs in a custom library is : '>reapeatname#class/subclass'"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
135 </when>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
136 </conditional>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
137 <param name="size_threshold" label="Cluster size threshold for detailed analysis" type="float" value="0.01" min="0.0001" max="100" help ="Minimal size (as percentage of input reads) of the smallest cluster which is analyzed, cluster with less than 20 reads are not considered at all."/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
138 <param name="automatic_filtering" label="Perform automatic filtering of abundant satellite repeats" help="Automatic filtering tries to identify the most abundant tandem repeats and remove such sequences partially from analysis. Removal of abundant tandem repeat can enable to analyze higher proportion of other less abundant repeats." type="boolean" truevalue="--automatic_filtering" falsevalue="" checked="false"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
139 <param name="keep_names" label="Keep original sequences names" type="boolean" truevalue="--keep_names" falsevalue="" checked="false" help="By default sequence are relabeled using integers. If you want to keep original names, use this option."/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
140 <param name="assembly_min_cluster_size" type="integer" label="min cluster size for assembly" value="5" min="2" max="100"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
141 </when>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
142 </conditional>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
143
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
144 </inputs>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
145 <outputs>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
146 <data name="log" format="txt" label="RepeatExplorer2 - log file"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
147 <data name="ReportArchive" format="zip" label="RepeatExplorer2 - Archive with HTML report from data ${FastaFile.hid}"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
148 <data name="ReportFile" format="html" label="RepeatExplorer2 - HTML report from data ${FastaFile.hid}"/>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
149 </outputs>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
150
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
151 <help>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
152 **HELP**
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
153
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
154 RepeatExplorer2 clustering is a computational pipeline for unsupervised
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
155 identification of repeats from unassembled sequence reads. The
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
156 pipeline uses low-pass whole genome sequence reads and performs graph-based
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
157 clustering. Resulting clusters, representing all types of repeats, are then
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
158 examined to identify and classify into repeats groups.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
159
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
160 **Input data**
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
161
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
162 The analysis requires either **single** or **paired-end reads** generated
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
163 by whole genome shotgun sequencing provided as a single fasta-formatted file.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
164 Generally, paired-end reads provide significantly better results than single
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
165 reads. Reads should be of uniform length (optimal size range is 100-200 nt) and
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
166 the number of analyzed reads should represent less than 1x genome equivalent
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
167 (genome coverage of 0.01 - 0.50 x is recommended). Reads should be
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
168 quality-filtered (recommended filtering : quality score >=10 over 95% of bases
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
169 and no Ns allowed) and only **complete read pairs** should be submitted for
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
170 analysis. When paired reads are used, input data must be **interlaced** format
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
171 as fasta file:
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
172
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
173 example of interlaced input format::
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
174
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
175 >0001_f
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
176 CGTAATATACATACTTGCTAGCTAGTTGGATGCATCCAACTTGCAAGCTAGTTTGATG
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
177 >0001_r
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
178 GATTTGACGGACACACTAACTAGCTAGTTGCATCTAAGCGGGCACACTAACTAACTAT
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
179 >0002_f
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
180 ACTCATTTGGACTTAACTTTGATAATAAAAACTTAAAAAGGTTTCTGCACATGAATCG
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
181 >0002_r
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
182 TATGTTGAAAAATTGAATTTCGGGACGAAACAGCGTCTATCGTCACGACATAGTGCTC
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
183 >0003_f
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
184 TGACATTTGTGAACGTTAATGTTCAACAAATCTTTCCAATGTCTTTTTATCTTATCAT
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
185 >0003_r
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
186 TATTGAAATACTGGACACAAATTGGAAATGAAACCTTGTGAGTTATTCAATTTATGTT
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
187 ...
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
188
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
189
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
190 **Comparative analysis**
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
191
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
192 For comparative analysis sequence names must contain code (prefix) for each group.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
193 Prefix in sequences names must be of fixed length.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
194
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
195 Example of labeling two groups with where **group code length** is 2 and is used to distinguish groups - AA and BB ::
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
196
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
197 >AA0001_f
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
198 CGTAATATACATACTTGCTAGCTAGTTGGATGCATCCAACTTGCAAGCTAGTTTGATG
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
199 >AA0001_r
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
200 GATTTGACGGACACACTAACTAGCTAGTTGCATCTAAGCGGGCACACTAACTAACTAT
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
201 >AA0002_f
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
202 ACTCATTTGGACTTAACTTTGATAATAAAAACTTAAAAAGGTTTCTGCACATGAATCG
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
203 >AA0002_r
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
204 TATGTTGAAAAATTGAATTTCGGGACGAAACAGCGTCTATCGTCACGACATAGTGCTC
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
205 >BB0001_f
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
206 TGACATTTGTGAACGTTAATGTTCAACAAATCTTTCCAATGTCTTTTTATCTTATCAT
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
207 >BB0001_r
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
208 TATTGAAATACTGGACACAAATTGGAAATGAAACCTTGTGAGTTATTCAATTTATGTT
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
209 >BB0002_f
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
210 TGACATTTGTGAACGTTAATGTTCAACAAATCTTTCCAATGTCTTTTTATCTTATCAT
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
211 >BB0002_r
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
212 TATTGAAATACTGGACACAAATTGGAAATGAAACCTTGTGAGTTATTCAATTTATGTT
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
213
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
214
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
215 To prepare quality filtered and interlaced input fasta file from fastq
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
216 files, use `Preprocessing of paired-reads`__ tool.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
217
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
218 .. __: tool_runner?tool_id=paired_fastq_filtering
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
219
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
220
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
221 **Additional parameters**
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
222
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
223 **Sample size** defines how many reads should be used in calculation.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
224 Default setting with 500,000 reads will enable detection of high copy
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
225 repeats within several hours of computation time. For higher
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
226 sensitivity the sample size can be set higher. Since sample size affects
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
227 the memory usage, this parameter may be automatically adjusted to lower
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
228 value during the run. Maximum sample size which can be processed depends on
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
229 the repetitiveness of analyzed genome.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
230
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
231
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
232 **Select taxon and protein domain database version (REXdb)**. Classification
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
233 of transposable elements is based on the similarity to our reference database
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
234 of transposable element protein domains (**REXdb**). Standalone database for Viridiplantae species
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
235 can be obtained on `repeatexplorer.org`__. Classification
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
236 system used in REXdb is described in article `Systematic survey of plant
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
237 LTR-retrotransposons elucidates phylogenetic relationships of their
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
238 polyprotein domains and provides a reference for element classification`__
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
239 Database for Metazoa species is still under development so use it with caution.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
240
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
241 .. __: http://repeatexplorer.org
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
242 .. __: https://doi.org/10.1186/s13100-018-0144-1
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
243
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
244 **Select parameters for protein domain search** REXdb is compared with s
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
245 equence clusters either using blastx or diamond aligner. Diamond program
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
246 is about three time faster than blastx with word size 3.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
247
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
248 **Similarity search options** By default sequence reads are compared using
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
249 mgblast program. Default threshold is explicitly set to 90% sequence
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
250 similarity spanning at least 55% of the read length (in the case of reads
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
251 differing in length it applies to the longer one). Additionally, sequence
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
252 overlap must be at least 55 nt. If you select option for shorter reads
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
253 than 100 nt, minimum overlap 55 nt is not required.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
254
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
255 By default,
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
256 mgblast search use DUST program to filter out
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
257 low-complexity sequences. If you want
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
258 to increase sensitivity of detection of satellites with shorter monomer
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
259 use option with '*no masking of low complexity repeats*'. Note that omitting
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
260 DUST filtering will significantly increase running times
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
261
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
262
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
263 **Automatic filtering of abundant satellite repeats** perform clustering on
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
264 smaller dataset of sequence reads to detect abundant high confidence
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
265 satellite repeats. If such satellites are detected, sequence reads derived
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
266 from these satellites are depleted from input dataset. This step enable more
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
267 sensitive detection of less abundant repeats as more reads can be used
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
268 in clustering step.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
269
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
270 **Use custom repeat database**. This option allows users to perform similarity
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
271 comparison of identified repeats to their custom databases. The repeat class must
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
272 be encoded in FASTA headers of database entries in order to allow correct
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
273 parsing of similarity hits. Required format for custom database sequence name is: ::
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
274
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
275 >reapeatname#class/subclass
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
276
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
277
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
278 **Output**
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
279
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
280 List of clusters identified as putative satellite repeats, their genomic
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
281 abundance and various cluster characteristics.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
282
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
283 Output includes a **HTML summary** with table listing of all analyzed
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
284 clusters. More detailed information about clusters is provided in
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
285 additional files and directories. All results are also provided as
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
286 downloadable **zip archive**. Additionally a **log file** reporting
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
287 the progress of the computational pipeline is provided.
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
288
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
289 </help>
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
290
3bc73f5dc785 Uploaded
petrn
parents:
diff changeset
291 </tool>