Mercurial > repos > bebatut > qiime
comparison assign_taxonomy.xml @ 0:c1bd0c560018 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author | bebatut |
---|---|
date | Tue, 02 Feb 2016 05:50:37 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c1bd0c560018 |
---|---|
1 <tool id="qiime_assign_taxonomy" name="assign taxonomy" version="1.9.1galaxy1"> | |
2 | |
3 <description>Assign taxonomy to each sequence</description> | |
4 | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 | |
9 <expand macro="requirements" /> | |
10 | |
11 <command> | |
12 <![CDATA[ | |
13 assign_taxonomy.py -i $input_fasta_fp | |
14 | |
15 #if str($id_to_taxonomy_fp) != 'None': | |
16 -t $id_to_taxonomy_fp | |
17 #end if | |
18 | |
19 #if str($reference_seqs_fp) != 'None': | |
20 -r $reference_seqs_fp | |
21 #end if | |
22 | |
23 #if str($methodcond.assignment_method) = 'None': | |
24 -m uclust | |
25 #end if | |
26 | |
27 #if str($methodcond.assignment_method) != 'None': | |
28 -m $methodcond.assignment_method | |
29 #end if | |
30 | |
31 #if $methodcond.assignment_method == "rtax": | |
32 | |
33 #if $methodcond.single_ok: | |
34 --single_ok | |
35 #end if | |
36 | |
37 #if $methodcond.no_single_ok_generic: | |
38 --no_single_ok_generic | |
39 #end if | |
40 | |
41 #if str($methodcond.read_id_regex): | |
42 --read_id_regex=$methodcond.read_id_regex | |
43 #end if | |
44 | |
45 #if str($methodcond.amplicon_id_regex): | |
46 --amplicon_id_regex=$methodcond.amplicon_id_regex | |
47 #end if | |
48 | |
49 #if str($methodcond.header_id_regex): | |
50 --header_id_regex=$methodcond.header_id_regex | |
51 #end if | |
52 #end if | |
53 | |
54 #if $methodcond.assignment_method == "sortmerna": | |
55 | |
56 #if str($methodcond.sortmerna_db): | |
57 --sortmerna_db=$methodcond.sortmerna_db | |
58 #end if | |
59 | |
60 #if $methodcond.sortmerna_e_value: | |
61 --sortmerna_e_value=$methodcond.sortmerna_e_value | |
62 #end if | |
63 | |
64 #if $methodcond.sortmerna_coverage: | |
65 --sortmerna_coverage=$methodcond.sortmerna_coverage | |
66 #end if | |
67 | |
68 #if $methodcond.sortmerna_best_N_alignments: | |
69 --sortmerna_best_N_alignments=$methodcond.sortmerna_best_N_alignments | |
70 #end if | |
71 | |
72 #if str($methodcond.sortmerna_threads): | |
73 --sortmerna_threads=$methodcond.sortmerna_threads | |
74 #end if | |
75 | |
76 #if $methodcond.min_consensus_fraction: | |
77 --min_consensus_fraction=$methodcond.min_consensus_fraction | |
78 #end if | |
79 | |
80 #if $methodcond.similarity: | |
81 --similarity=$methodcond.similarity | |
82 #end if | |
83 #end if | |
84 | |
85 #if $methodcond.assignment_method == "blast": | |
86 | |
87 #if str($methodcond.blast_db) != 'None': | |
88 -b \$BLAST_DB_NAME | |
89 #end if | |
90 | |
91 #if $methodcond.blast_e_value: | |
92 -e $methodcond.blast_e_value | |
93 #end if | |
94 #end if | |
95 | |
96 #if $methodcond.assignment_method == "rdp": | |
97 | |
98 #if $methodcond.confidence: | |
99 -c $methodcond.confidence | |
100 #end if | |
101 | |
102 #if $methodcond.rdp_max_memory: | |
103 --rdp_max_memory=$methodcond.rdp_max_memory | |
104 #end if | |
105 #end if | |
106 | |
107 #if $methodcond.assignment_method == "mothur": | |
108 | |
109 #if $methodcond.confidence: | |
110 -c $methodcond.confidence | |
111 #end if | |
112 #end if | |
113 | |
114 #if $methodcond.assignment_method == "uclust": | |
115 | |
116 #if $methodcond.min_consensus_fraction: | |
117 --min_consensus_fraction=$methodcond.min_consensus_fraction | |
118 #end if | |
119 | |
120 #if $methodcond.similarity: | |
121 --similarity=$methodcond.similarity | |
122 #end if | |
123 | |
124 #if $methodcond.uclust_max_accepts: | |
125 --uclust_max_accepts=$methodcond.uclust_max_accepts | |
126 #end if | |
127 #end if | |
128 -o assign_taxonomy_output | |
129 ]]> | |
130 </command> | |
131 | |
132 <inputs> | |
133 <param label="-i/--input_fasta_fp: path to the input fasta file" | |
134 name="input_fasta_fp" optional="False" type="data"/> | |
135 <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt" | |
136 label="-t/--id_to_taxonomy_fp: Path to tab-delimited file mapping | |
137 sequences to assigned taxonomy. Each assigned taxonomy is provided as | |
138 a semicolon-separated list. For assignment with rdp, each assigned | |
139 taxonomy must be exactly 6 levels deep. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt]" | |
140 name="id_to_taxonomy_fp" optional="True" type="data"/> | |
141 <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta" | |
142 label="-r/--reference_seqs_fp: Path to reference sequences. For | |
143 assignment with blast, these are used to generate a blast database. | |
144 For assignment with rdp, they are used as training sequences for the | |
145 classifier. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta]" | |
146 name="reference_seqs_fp" optional="True" type="data"/> | |
147 | |
148 <conditional name="methodcond"> | |
149 <param label="-m/--assignment_method: Taxon assignment method, must be | |
150 one of rdp, blast, rtax, mothur, uclust, sortmerna [default: uclust]" | |
151 name="assignment_method" optional="False" type="select"> | |
152 <option selected="True" value="uclust">uclust</option> | |
153 <option value="rdp">rdp</option> | |
154 <option value="blast">blast</option> | |
155 <option value="rtax">rtax</option> | |
156 <option value="mothur">mothur</option> | |
157 <option value="sortmerna">sortmerna</option> | |
158 </param> | |
159 <when value="rtax"> | |
160 <param label="--single_ok: When classifying paired ends, allow | |
161 fallback to single-ended classification when the mate pair is | |
162 lacking (used for RTAX only). [default: False]" name="single_ok" | |
163 selected="False" type="boolean"/> | |
164 <param label="--no_single_ok_generic: When classifying paired ends, | |
165 do not allow fallback to single-ended classification when the | |
166 mate pair is overly generic (used for RTAX only). [default: False]" | |
167 name="no_single_ok_generic" selected="False" type="boolean"/> | |
168 <param default="\S+\s+(\S+)" label="--read_id_regex: Used to parse | |
169 the result of OTU clustering, to get the read_1_id for each | |
170 clusterID. The clusterID itself is assumed to be the first | |
171 field, and is not captured by the regex. (used for RTAX only). | |
172 [default: \S+\s+(\S+)]" name="read_id_regex" optional="True" | |
173 type="text"/> | |
174 <param default="(\S+)\s+(\S+?)\/" label="--amplicon_id_regex: Used | |
175 to parse the result of split_libraries, to get the ampliconID | |
176 for each read_1_id. Two groups capture read_1_id and ampliconID, | |
177 respectively. (used for RTAX only). [default: (\S+)\s+(\S+?)\/]" | |
178 name="amplicon_id_regex" optional="True" type="text"/> | |
179 <param default="\S+\s+(\S+?)\/" label="--header_id_regex: Used to | |
180 parse the result of split_libraries, to get the portion of the | |
181 header that RTAX uses to match mate pairs. The default uses | |
182 the amplicon ID, not including /1 or /3, as the primary key | |
183 for the query sequences. Typically this regex will be the | |
184 same as amplicon_id_regex, except that only the second group | |
185 is captured. (used for RTAX only). [default: \S+\s+(\S+?)\/]" | |
186 name="header_id_regex" optional="True" type="text"/> | |
187 </when> | |
188 <when value="sortmerna"> | |
189 <param label="--sortmerna_db: Pre-existing database to search | |
190 against when using sortmerna [default: None]" name="sortmerna_db" | |
191 optional="True" type="text"/> | |
192 <param default="1.0" label="--sortmerna_e_value: Maximum E-value | |
193 when clustering [default = 1.0]" name="sortmerna_e_value" | |
194 optional="True" type="float"/> | |
195 <param default="0.9" label="--sortmerna_coverage: Mininum percent | |
196 query coverage (of an alignment) to consider a hit, expressed | |
197 as a fraction between 0 and 1 [default: 0.9]" | |
198 name="sortmerna_coverage" optional="True" type="float"/> | |
199 <param default="5" label="--sortmerna_best_N_alignments: This option | |
200 specifies how many best alignments per read will be written | |
201 [default: 5]" name="sortmerna_best_N_alignments" optional="True" | |
202 type="integer"/> | |
203 <param default="1" label="--sortmerna_threads: Specify number of | |
204 threads to be used for sortmerna mapper which utilizes multithreading. | |
205 [default: 1]" name="sortmerna_threads" optional="True" | |
206 type="text"/> | |
207 <param default="0.51" label="--min_consensus_fraction: Minimum | |
208 fraction of database hits that must have a specific taxonomic | |
209 assignment to assign that taxonomy to a query, only used for | |
210 sortmerna and uclust methods [default: 0.51]" | |
211 name="min_consensus_fraction" optional="True" type="float"/> | |
212 <param default="0.9" label="--similarity: Minimum percent similarity | |
213 (expressed as a fraction between 0 and 1) to consider a database | |
214 match a hit, only used for sortmerna and uclust methods | |
215 [default: 0.9]" name="similarity" optional="True" type="float"/> | |
216 </when> | |
217 <when value="blast"> | |
218 <param label="-b/--blast_db: Database to blast against. Must provide | |
219 either --blast_db or --reference_seqs_db for assignment with blast | |
220 [default: None]" name="blast_db" optional="True" type="data"/> | |
221 <param default="0.001" label="-e/--blast_e_value: Maximum e-value | |
222 to record an assignment, only used for blast method [default: | |
223 0.001]" name="blast_e_value" optional="True" type="float"/> | |
224 </when> | |
225 <when value="rdp"> | |
226 <param default="0.5" label="-c/--confidence: Minimum confidence to | |
227 record an assignment, only used for rdp and mothur methods | |
228 [default: 0.5]" name="confidence" optional="True" type="float"/> | |
229 <param default="4000" label="--rdp_max_memory: Maximum memory | |
230 allocation, in MB, for Java virtual machine when using the | |
231 rdp method. Increase for large training sets [default: 4000]" | |
232 name="rdp_max_memory" optional="True" type="integer"/> | |
233 </when> | |
234 <when value="mothur"> | |
235 <param default="0.5" label="-c/--confidence: Minimum confidence to | |
236 record an assignment, only used for rdp and mothur methods | |
237 [default: 0.5]" name="confidence" optional="True" type="float"/> | |
238 </when> | |
239 <when value="uclust"> | |
240 <param default="0.51" label="--min_consensus_fraction: Minimum | |
241 fraction of database hits that must have a specific taxonomic | |
242 assignment to assign that taxonomy to a query, only used for | |
243 sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction" | |
244 optional="True" type="float"/> | |
245 <param default="0.9" label="--similarity: Minimum percent similarity | |
246 (expressed as a fraction between 0 and 1) to consider a database | |
247 match a hit, only used for sortmerna and uclust methods [default: | |
248 0.9]" name="similarity" optional="True" type="float"/> | |
249 <param default="3" label="--uclust_max_accepts: Number of database | |
250 hits to consider when making an assignment, only used for uclust | |
251 method [default: 3]" name="uclust_max_accepts" optional="True" | |
252 type="integer"/> | |
253 </when> | |
254 </conditional> | |
255 </inputs> | |
256 | |
257 <outputs> | |
258 <data format="txt" from_work_dir="assign_taxonomy_output/*.log" | |
259 label="tax_assignements.log" name="tax_assignements.log"/> | |
260 <data format="txt" from_work_dir="assign_taxonomy_output/*.txt" | |
261 label="tax_assignements.txt" name="tax_assignements.txt"/> | |
262 </outputs> | |
263 | |
264 <tests> | |
265 <test> | |
266 </test> | |
267 </tests> | |
268 | |
269 <help><![CDATA[ | |
270 **What it does** | |
271 | |
272 Contains code for assigning taxonomy, using several techniques. | |
273 | |
274 Given a set of sequences, %prog attempts to assign the taxonomy of each sequence. | |
275 Currently the methods implemented are assignment with BLAST, the RDP classifier, | |
276 RTAX, mothur, and uclust. The output of this step is an observation metadata | |
277 mapping file of input sequence identifiers (1st column of output file) to taxonomy | |
278 (2nd column) and quality score (3rd column). There may be method-specific information | |
279 in subsequent columns. | |
280 | |
281 Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in | |
282 the Greengenes reference OTU builds. To get the latest build of the Greengenes OTUs | |
283 (and other marker gene OTU collections), follow the "Resources" link from http://qiime.org. | |
284 After downloading and unzipping you can use the following files as -r and -t, where | |
285 <otus_dir> is the name of the new directory after unzipping the reference OTUs tgz | |
286 file. | |
287 | |
288 -r <otus_dir>/rep_set/97_otus.fasta | |
289 -t <otus_dir></otus_dir>/taxonomy/97_otu_taxonomy.txt | |
290 | |
291 The consensus taxonomy assignment implemented here is the most detailed lineage | |
292 description shared by 90% or more of the sequences within the OTU (this level of | |
293 agreement can be adjusted by the user). The full lineage information for each | |
294 sequence is one of the output files of the analysis. In addition, a conflict file | |
295 records cases in which a phylum-level taxonomy assignment disagreement exists | |
296 within an OTU (such instances are rare and can reflect sequence misclassification | |
297 within the greengenes database). | |
298 ]]> | |
299 </help> | |
300 | |
301 <citations> | |
302 <expand macro="citations" /> | |
303 </citations> | |
304 </tool> |