Mercurial > repos > bebatut > qiime
comparison pick_otus.xml @ 0:c1bd0c560018 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author | bebatut |
---|---|
date | Tue, 02 Feb 2016 05:50:37 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c1bd0c560018 |
---|---|
1 <tool id="qiime_pick_otus" name="pick otus" version="1.9.1galaxy1"> | |
2 | |
3 <description>OTU picking</description> | |
4 | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 | |
9 <expand macro="requirements" /> | |
10 | |
11 <command> | |
12 <![CDATA[ | |
13 pick_otus.py | |
14 -i $input_seqs_filepath | |
15 -o fastasplit | |
16 | |
17 #if str($methode.otu_picking_method) != 'None': | |
18 -m $methode.otu_picking_method | |
19 #end if | |
20 | |
21 #if str($methode.otu_picking_method) in ("uclust_ref","usearch_ref") : | |
22 -r $methode.refseqs_fp | |
23 #end if | |
24 | |
25 #if str($methode.otu_picking_method) in ("uclust","uclust_ref","usearch","usearch_ref","sumaclust") : | |
26 #if $methode.similarity: | |
27 -s $methode.similarity | |
28 #end if | |
29 #end if | |
30 | |
31 #if str($methode.otu_picking_method) == "sumaclust": | |
32 #if $methode.sumaclust_exact: | |
33 --sumaclust_exact | |
34 #end if | |
35 #end if | |
36 | |
37 #if str($methode.otu_picking_method) == "swarm": | |
38 #if $methode.swarm_resolution: | |
39 --swarm_resolution=$methode.swarm_resolution | |
40 #end if | |
41 #end if | |
42 | |
43 #if str($methode.otu_picking_method) in ("uclust","uclust_ref","usearch","usearch_ref"): | |
44 #if $methode.enable_rev_strand_match: | |
45 -z | |
46 #end if | |
47 #if str($methode.max_accepts): | |
48 --max_accepts=$methode.max_accepts | |
49 #end if | |
50 | |
51 #if str($methode.max_rejects): | |
52 --max_rejects=$methode.max_rejects | |
53 #end if | |
54 #end if | |
55 | |
56 #if str($methode.otu_picking_method) in ("uclust","uclust_ref"): | |
57 #if $methode.stepwords: | |
58 --stepwords=$methode.stepwords | |
59 #end if | |
60 #if $methode.suppress_presort_by_abundance_uclust: | |
61 -D | |
62 #end if | |
63 #end if | |
64 | |
65 #if str($methode.otu_picking_method) == "uclust": | |
66 #if $methode.optimal_uclust: | |
67 -A | |
68 #end if | |
69 #if $methode.exact_uclust: | |
70 -E | |
71 #end if | |
72 #end if | |
73 | |
74 #if str($methode.otu_picking_method) == "usearch": | |
75 #if $methode.percent_id_err: | |
76 -j $methode.percent_id_err | |
77 #end if | |
78 #if $methode.abundance_skew: | |
79 -a $methode.abundance_skew | |
80 #end if | |
81 #if str($methode.db_filepath) != 'None': | |
82 -f $methode.db_filepath | |
83 #end if | |
84 #if $methode.perc_id_blast: | |
85 --perc_id_blast=$methode.perc_id_blast | |
86 #end if | |
87 #if $methode.suppress_de_novo_chimera_detection: | |
88 -k | |
89 #end if | |
90 #end if | |
91 | |
92 #if str($methode.otu_picking_method) in ("sumaclust","swarm"): | |
93 #if str($methode.threads): | |
94 --threads=$methode.threads | |
95 #end if | |
96 #end if | |
97 | |
98 #if $prefix_prefilter_length: | |
99 -n $prefix_prefilter_length | |
100 #end if | |
101 | |
102 #if $prefix_length: | |
103 -p $prefix_length | |
104 #end if | |
105 | |
106 #if $suffix_length: | |
107 -u $suffix_length | |
108 #end if | |
109 | |
110 #if str($non_chimeras_retention): | |
111 -F $non_chimeras_retention | |
112 #end if | |
113 ]]> | |
114 </command> | |
115 | |
116 <inputs> | |
117 <param label="-i/--input_seqs_filepath: Path to input sequences file" | |
118 name="input_seqs_filepath" optional="False" type="data"/> | |
119 <conditional name="methode"> | |
120 <param label="-m/--otu_picking_method: Method for picking OTUs. Valid | |
121 choices are: sortmerna, mothur, trie, uclust_ref, usearch, usearch_ref, | |
122 blast, usearch61, usearch61_ref, sumaclust, swarm, prefix_suffix, | |
123 cdhit, uclust. The mothur method requires an input file of aligned | |
124 sequences. usearch will enable the usearch quality filtering | |
125 pipeline. [default: uclust]" name="otu_picking_method" | |
126 optional="FALSE" type="select"> | |
127 <option selected="True" value="uclust">uclust</option> | |
128 <option value="uclust_ref">uclust_ref</option> | |
129 <option value="usearch">usearch</option> | |
130 <option value="usearch_ref">usearch_ref</option> | |
131 <option value="sumaclust">sumaclust</option> | |
132 <option value="swarm">swarm</option> | |
133 </param> | |
134 <when value="uclust_ref"> | |
135 <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/qiime-1.9.0/python_modules/lib/python2.7/site-packages/qiime_default_reference-0.1.1-py2.7.egg/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta" | |
136 label="-r/--refseqs_fp: Path to reference sequences to search | |
137 against when using -m uclust_ref, -m usearch_ref [default: | |
138 /home12/caparmor/bioinfo/softs/sources/Qiime/qiime-1.9.0/python_modules/lib/python2.7/site-packages/qiime_default_reference-0.1.1-py2.7.egg/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta]" name="refseqs_fp" optional="True" type="data"/> | |
139 <param default="0.97" label="-s/--similarity: Sequence similarity | |
140 threshold (for blast, cdhit, uclust, uclust_ref, usearch, | |
141 usearch_ref, usearch61, usearch61_ref, sumaclust or sortmerna | |
142 [default: 0.97]" name="similarity" optional="True" type="float"/> | |
143 <param label="-z/--enable_rev_strand_match: Enable reverse strand | |
144 matching for uclust, uclust_ref, usearch, usearch_ref, usearch61, | |
145 or usearch61_ref otu picking, will double the amount of memory | |
146 used. [default: False]" name="enable_rev_strand_match" | |
147 selected="False" type="boolean"/> | |
148 <param label="-D/--suppress_presort_by_abundance_uclust: Suppress | |
149 presorting of sequences by abundance when picking OTUs with | |
150 uclust or uclust_ref [default: False]" | |
151 name="suppress_presort_by_abundance_uclust" selected="False" | |
152 type="boolean"/> | |
153 <param label="-C/--suppress_new_clusters: Suppress creation of new | |
154 clusters using seqs that don't match reference when using -m | |
155 uclust_ref, -m usearch61_ref, or -m usearch_ref [default: False]" | |
156 name="suppress_new_clusters" selected="False" type="boolean"/> | |
157 <param default="default" label="--max_accepts: max_accepts value | |
158 to uclust, uclust_ref, usearch61, and usearch61_ref. By default, | |
159 will use value suggested by method (uclust: 1, usearch61: 1) | |
160 [default: default]" name="max_accepts" optional="True" type="text"/> | |
161 <param default="default" label="--max_rejects: max_rejects value | |
162 for uclust, uclust_ref, usearch61, and usearch61_ref. With | |
163 default settings, will use value recommended by clustering | |
164 method used (uclust: 8, usearch61: 8 for usearch_fast_cluster | |
165 option, 32 for reference and smallmem options) [default: | |
166 default]" name="max_rejects" optional="True" type="text"/> | |
167 <param default="8" label="--stepwords: stepwords value to uclust | |
168 and uclust_ref [default: 8]" name="stepwords" optional="True" | |
169 type="integer"/> | |
170 </when> | |
171 <when value="usearch_ref"> | |
172 <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/qiime-1.9.0/python_modules/lib/python2.7/site-packages/qiime_default_reference-0.1.1-py2.7.egg/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta" | |
173 label="-r/--refseqs_fp: Path to reference sequences to search | |
174 against when using -m blast, -m sortmerna, -m uclust_ref, -m | |
175 usearch_ref, or -m usearch61_ref [default: | |
176 /home12/caparmor/bioinfo/softs/sources/Qiime/qiime-1.9.0/python_modules/lib/python2.7/site-packages/qiime_default_reference-0.1.1-py2.7.egg/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta]" name="refseqs_fp" optional="True" type="data"/> | |
177 <param default="0.97" label="-s/--similarity: Sequence similarity | |
178 threshold (for blast, cdhit, uclust, uclust_ref, usearch, | |
179 usearch_ref, usearch61, usearch61_ref, sumaclust or sortmerna | |
180 [default: 0.97]" name="similarity" optional="True" type="float"/> | |
181 <param label="-z/--enable_rev_strand_match: Enable reverse strand | |
182 matching for uclust, uclust_ref, usearch, usearch_ref, usearch61, | |
183 or usearch61_ref otu picking, will double the amount of memory | |
184 used. [default: False]" name="enable_rev_strand_match" selected="False" | |
185 type="boolean"/> | |
186 <param label="-C/--suppress_new_clusters: Suppress creation of new | |
187 clusters using seqs that don't match reference when using -m | |
188 uclust_ref, -m usearch61_ref, or -m usearch_ref [default: False]" | |
189 name="suppress_new_clusters" selected="False" type="boolean"/> | |
190 <param default="default" label="--max_accepts: max_accepts value | |
191 to uclust, uclust_ref, usearch61, and usearch61_ref. By default, | |
192 will use value suggested by method (uclust: 1, usearch61: 1) | |
193 [default: default]" name="max_accepts" optional="True" type="text"/> | |
194 <param default="default" label="--max_rejects: max_rejects value | |
195 for uclust, uclust_ref, usearch61, and usearch61_ref. With | |
196 default settings, will use value recommended by clustering method | |
197 used (uclust: 8, usearch61: 8 for usearch_fast_cluster option, | |
198 32 for reference and smallmem options) [default: default]" | |
199 name="max_rejects" optional="True" type="text"/> | |
200 </when> | |
201 <when value="usearch"> | |
202 <param default="0.97" value="0.97" label="-s/--similarity: Sequence | |
203 similarity threshold (for blast, cdhit, uclust, uclust_ref, | |
204 usearch, usearch_ref, usearch61, usearch61_ref, sumaclust or | |
205 sortmerna [default: 0.97]" name="similarity" optional="True" | |
206 type="float"/> | |
207 <param label="-z/--enable_rev_strand_match: Enable reverse strand | |
208 matching for uclust, uclust_ref, usearch, usearch_ref, usearch61, | |
209 or usearch61_ref otu picking, will double the amount of memory | |
210 used. [default: False]" name="enable_rev_strand_match" | |
211 selected="False" type="boolean"/> | |
212 <param default="default" label="--max_accepts: max_accepts value | |
213 to uclust, uclust_ref, usearch61, and usearch61_ref. By | |
214 default, will use value suggested by method (uclust: 1, | |
215 usearch61: 1) [default: default]" name="max_accepts" optional="True" | |
216 type="text"/> | |
217 <param default="default" label="--max_rejects: max_rejects value | |
218 for uclust, uclust_ref, usearch61, and usearch61_ref. With | |
219 default settings, will use value recommended by clustering | |
220 method used (uclust: 8, usearch61: 8 for usearch_fast_cluster | |
221 option, 32 for reference and smallmem options) [default: default]" | |
222 name="max_rejects" optional="True" type="text"/> | |
223 <param default="0.97" value="0.97" label="-j/--percent_id_err: | |
224 Percent identity threshold for cluster error detection with | |
225 usearch, expressed as a fraction between 0 and 1. [default: 0.97]" | |
226 name="percent_id_err" optional="True" type="float"/> | |
227 <param default="2.0" label="-a/--abundance_skew: Abundance skew | |
228 setting for de novo chimera detection with usearch. [default: 2.0]" | |
229 name="abundance_skew" optional="True" type="float"/> | |
230 <param default="None" label="-f/--db_filepath: Reference database | |
231 of fasta sequences for reference based chimera detection with | |
232 usearch. [default: None]" name="db_filepath" optional="True" | |
233 type="data"/> | |
234 <param default="0.97" value="0.97" label="--perc_id_blast: Percent | |
235 ID for mapping OTUs created by usearch back to original sequence | |
236 IDs [default: 0.97]" name="perc_id_blast" optional="True" | |
237 type="float"/> | |
238 <param label="-k/--suppress_de_novo_chimera_detection: Suppress | |
239 de novo chimera detection in usearch. [default: False]" | |
240 name="suppress_de_novo_chimera_detection" selected="False" | |
241 type="boolean"/> | |
242 <param label="--usearch_fast_cluster: Use fast clustering option | |
243 for usearch or usearch61_ref with new clusters. | |
244 --enable_rev_strand_match can not be enabled with this option, | |
245 and the only valid option for usearch61_sort_method is 'length'. | |
246 This option uses more memory than the default option for de novo | |
247 clustering. [default: False]" name="usearch_fast_cluster" | |
248 selected="False" type="boolean"/> | |
249 </when> | |
250 <when value="sumaclust"> | |
251 <param default="0.97" value="0.97" label="-s/--similarity: Sequence | |
252 similarity threshold (for blast, cdhit, uclust, uclust_ref, | |
253 usearch, usearch_ref, usearch61, usearch61_ref, sumaclust or | |
254 sortmerna [default: 0.97]" name="similarity" optional="True" | |
255 type="float"/> | |
256 <param label="--sumaclust_exact: A sequence is assigned to the best | |
257 matching seed rather than the first matching seed passing the | |
258 similarity threshold [default: False]" name="sumaclust_exact" | |
259 selected="False" type="boolean"/> | |
260 <param default="1" label="--threads: Specify number of threads (1 | |
261 thread per core) to be used for usearch61, sortmerna, sumaclust and swarm commands that utilize multithreading. [default: 1]" name="threads" optional="True" type="text"/> | |
262 </when> | |
263 <when value="swarm"> | |
264 <param default="1" label="--swarm_resolution: Maximum number of | |
265 differences allowed between two amplicons, meaning that two | |
266 amplicons will be grouped if they have integer (or less) | |
267 differences (see Swarm manual at https://github.com/torognes/swarm | |
268 for more details). [default: 1]" name="swarm_resolution" | |
269 optional="True" type="integer"/> | |
270 <param default="1" label="--threads: Specify number of threads (1 | |
271 thread per core) to be used for usearch61, sortmerna, sumaclust | |
272 and swarm commands that utilize multithreading. [default: 1]" | |
273 name="threads" optional="True" type="text"/> | |
274 </when> | |
275 <when value="uclust"> | |
276 <param default="0.97" value="0.97" label="-s/--similarity: Sequence | |
277 similarity threshold (for blast, cdhit, uclust, uclust_ref, | |
278 usearch, usearch_ref, usearch61, usearch61_ref, sumaclust or | |
279 sortmerna [default: 0.97]" name="similarity" optional="True" | |
280 type="float"/> | |
281 <param label="-z/--enable_rev_strand_match: Enable reverse strand | |
282 matching for uclust, uclust_ref, usearch, usearch_ref, usearch61, | |
283 or usearch61_ref otu picking, will double the amount of memory | |
284 used. [default: False]" name="enable_rev_strand_match" selected="False" | |
285 type="boolean"/> | |
286 <param label="-A/--optimal_uclust: Pass the --optimal flag to uclust | |
287 for uclust otu picking. [default: False]" name="optimal_uclust" | |
288 selected="False" type="boolean"/> | |
289 <param label="-D/--suppress_presort_by_abundance_uclust: Suppress | |
290 presorting of sequences by abundance when picking OTUs with | |
291 uclust or uclust_ref [default: False]" | |
292 name="suppress_presort_by_abundance_uclust" selected="False" | |
293 type="boolean"/> | |
294 <param default="default" label="--max_accepts: max_accepts value | |
295 to uclust, uclust_ref, usearch61, and usearch61_ref. By default, | |
296 will use value suggested by method (uclust: 1, usearch61: 1) | |
297 [default: default]" name="max_accepts" optional="True" | |
298 type="text"/> | |
299 <param default="default" label="--max_rejects: max_rejects value | |
300 for uclust, uclust_ref, usearch61, and usearch61_ref. With | |
301 default settings, will use value recommended by clustering | |
302 method used (uclust: 8, usearch61: 8 for usearch_fast_cluster | |
303 option, 32 for reference and smallmem options) [default: default]" | |
304 name="max_rejects" optional="True" type="text"/> | |
305 <param default="8" label="--stepwords: stepwords value to uclust | |
306 and uclust_ref [default: 8]" name="stepwords" optional="True" | |
307 type="integer"/> | |
308 <param label="-E/--exact_uclust: Pass the --exact flag to uclust | |
309 for uclust otu picking. [default: False]" name="exact_uclust" | |
310 selected="False" type="boolean"/> | |
311 </when> | |
312 </conditional> | |
313 | |
314 <param default="None" label="-n/--prefix_prefilter_length: Prefilter data | |
315 so seqs with identical first prefix_prefilter_length are automatically | |
316 grouped into a single OTU. This is useful for large sequence collections | |
317 where OTU picking doesn't scale well [default: None; 100 is a good value]" | |
318 name="prefix_prefilter_length" optional="True" type="integer"/> | |
319 <param default="50" label="-p/--prefix_length: Prefix length when using | |
320 the prefix_suffix otu picker; WARNING: CURRENTLY DIFFERENT FROM | |
321 prefix_prefilter_length (-n)! [default: 50]" name="prefix_length" | |
322 optional="True" type="integer"/> | |
323 <param default="50" label="-u/--suffix_length: Suffix length when using | |
324 the prefix_suffix otu picker [default: 50]" name="suffix_length" | |
325 optional="True" type="integer"/> | |
326 <param default="union" label="-F/--non_chimeras_retention: Selects subsets | |
327 of sequences detected as non-chimeras to retain after de novo and | |
328 reference based chimera detection. Options are intersection or union. | |
329 union will retain sequences that are flagged as non-chimeric from either | |
330 filter, while intersection will retain only those sequences that are | |
331 flagged as non-chimeras from both detection methods. [default: union]" | |
332 name="non_chimeras_retention" optional="True" type="text"/> | |
333 </inputs> | |
334 <outputs> | |
335 <data format="txt" from_work_dir="fastasplit/*_otus.txt" | |
336 name="pick_otus.txt" label="pick_otus.txt"/> | |
337 <data format="txt" from_work_dir="fastasplit/*_otus.log" | |
338 name="pick_otus.log" label="pick_otus.log"/> | |
339 <data format="txt" from_work_dir="fastasplit/*_failures.txt" | |
340 name="pick_otus_failures.txt" label="pick_otus_failures.txt"/> | |
341 </outputs> | |
342 | |
343 <tests> | |
344 <test> | |
345 </test> | |
346 </tests> | |
347 | |
348 <help><![CDATA[ | |
349 **What it does** | |
350 | |
351 The OTU picking step assigns similar sequences to operational taxonomic units, or OTUs, by clustering sequences based on a user-defined similarity threshold. Sequences which are similar at or above the threshold level are taken to represent the presence of a taxonomic unit (e.g., a genus, when the similarity threshold is set at 0.94) in the sequence collection. | |
352 | |
353 Currently, the following clustering methods have been implemented in QIIME: | |
354 | |
355 1. uclust, creates "seeds" of sequences which generate clusters based on percent identity. | |
356 | |
357 2. uclust_ref, as uclust, but takes a reference database to use as seeds. New clusters can be toggled on or off. | |
358 | |
359 3. usearch, creates "seeds" of sequences which generate clusters based on percent identity, filters low abundance clusters, performs de novo and reference based chimera detection. | |
360 | |
361 4. usearch_ref, as usearch, but takes a reference database to use as seeds. New clusters can be toggled on or off. | |
362 | |
363 5. sumaclust, creates "seeds" of sequences which generate clusters based on similarity threshold. | |
364 | |
365 6. swarm, creates "seeds" of sequences which generate clusters based on a resolution threshold. | |
366 | |
367 | |
368 Chimera checking with usearch 6.X is implemented in identify_chimeric_seqs.py. Chimera checking should be done first with usearch 6.X, and the filtered resulting fasta file can then be clustered. | |
369 | |
370 | |
371 The primary inputs for pick_otus.py are: | |
372 | |
373 1. A FASTA file containing sequences to be clustered | |
374 | |
375 2. An OTU threshold (default is 0.97, roughly corresponding to species-level OTUs); | |
376 | |
377 3. The method to be applied for clustering sequences into OTUs. | |
378 | |
379 pick_otus.py takes a standard fasta file as input. | |
380 | |
381 | |
382 The output consists of two files (i.e. seqs_otus.txt and seqs_otus.log). The .txt file is composed of tab-delimited lines, where the first field on each line corresponds to an (arbitrary) cluster identifier, and the remaining fields correspond to sequence identifiers assigned to that cluster. Sequence identifiers correspond to those provided in the input FASTA file. Usearch (i.e. usearch quality filter) can additionally have log files for each intermediate call to usearch. | |
383 | |
384 Example lines from the resulting .txt file: | |
385 | |
386 = ==== ==== ==== | |
387 0 seq1 seq5 | |
388 1 seq2 | |
389 2 seq3 | |
390 3 seq4 seq6 seq7 | |
391 = ==== ==== ==== | |
392 | |
393 This result implies that four clusters were created based on 7 input sequences. | |
394 The first cluster (cluster id 0) contains two sequences, sequence ids seq1 and | |
395 seq5; the second cluster (cluster id 1) contains one sequence, sequence id seq2; | |
396 the third cluster (cluster id 2) contains one sequence, sequence id seq3, and the | |
397 final cluster (cluster id 3) contains three sequences, sequence ids seq4, seq6, | |
398 and seq7. | |
399 | |
400 The resulting .log file contains a list of parameters passed to the pick_otus.py | |
401 script along with the output location of the resulting .txt file.</help> | |
402 ]]> | |
403 </help> | |
404 | |
405 <citations> | |
406 <expand macro="citations" /> | |
407 <citation type="doi">10.1093/bioinformatics/btv231</citation> | |
408 <citation type="doi">10.1093/bioinformatics/btq461</citation> | |
409 <citation type="doi">10.1093/bioinformatics/bts611</citation> | |
410 <citation type="doi">10.7287/peerj.preprints.386v1/supp-1</citation> | |
411 </citations> | |
412 </tool> |