comparison assign_taxonomy.xml @ 0:c1bd0c560018 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author bebatut
date Tue, 02 Feb 2016 05:50:37 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c1bd0c560018
1 <tool id="qiime_assign_taxonomy" name="assign taxonomy" version="1.9.1galaxy1">
2
3 <description>Assign taxonomy to each sequence</description>
4
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8
9 <expand macro="requirements" />
10
11 <command>
12 <![CDATA[
13 assign_taxonomy.py -i $input_fasta_fp
14
15 #if str($id_to_taxonomy_fp) != 'None':
16 -t $id_to_taxonomy_fp
17 #end if
18
19 #if str($reference_seqs_fp) != 'None':
20 -r $reference_seqs_fp
21 #end if
22
23 #if str($methodcond.assignment_method) = 'None':
24 -m uclust
25 #end if
26
27 #if str($methodcond.assignment_method) != 'None':
28 -m $methodcond.assignment_method
29 #end if
30
31 #if $methodcond.assignment_method == "rtax":
32
33 #if $methodcond.single_ok:
34 --single_ok
35 #end if
36
37 #if $methodcond.no_single_ok_generic:
38 --no_single_ok_generic
39 #end if
40
41 #if str($methodcond.read_id_regex):
42 --read_id_regex=$methodcond.read_id_regex
43 #end if
44
45 #if str($methodcond.amplicon_id_regex):
46 --amplicon_id_regex=$methodcond.amplicon_id_regex
47 #end if
48
49 #if str($methodcond.header_id_regex):
50 --header_id_regex=$methodcond.header_id_regex
51 #end if
52 #end if
53
54 #if $methodcond.assignment_method == "sortmerna":
55
56 #if str($methodcond.sortmerna_db):
57 --sortmerna_db=$methodcond.sortmerna_db
58 #end if
59
60 #if $methodcond.sortmerna_e_value:
61 --sortmerna_e_value=$methodcond.sortmerna_e_value
62 #end if
63
64 #if $methodcond.sortmerna_coverage:
65 --sortmerna_coverage=$methodcond.sortmerna_coverage
66 #end if
67
68 #if $methodcond.sortmerna_best_N_alignments:
69 --sortmerna_best_N_alignments=$methodcond.sortmerna_best_N_alignments
70 #end if
71
72 #if str($methodcond.sortmerna_threads):
73 --sortmerna_threads=$methodcond.sortmerna_threads
74 #end if
75
76 #if $methodcond.min_consensus_fraction:
77 --min_consensus_fraction=$methodcond.min_consensus_fraction
78 #end if
79
80 #if $methodcond.similarity:
81 --similarity=$methodcond.similarity
82 #end if
83 #end if
84
85 #if $methodcond.assignment_method == "blast":
86
87 #if str($methodcond.blast_db) != 'None':
88 -b \$BLAST_DB_NAME
89 #end if
90
91 #if $methodcond.blast_e_value:
92 -e $methodcond.blast_e_value
93 #end if
94 #end if
95
96 #if $methodcond.assignment_method == "rdp":
97
98 #if $methodcond.confidence:
99 -c $methodcond.confidence
100 #end if
101
102 #if $methodcond.rdp_max_memory:
103 --rdp_max_memory=$methodcond.rdp_max_memory
104 #end if
105 #end if
106
107 #if $methodcond.assignment_method == "mothur":
108
109 #if $methodcond.confidence:
110 -c $methodcond.confidence
111 #end if
112 #end if
113
114 #if $methodcond.assignment_method == "uclust":
115
116 #if $methodcond.min_consensus_fraction:
117 --min_consensus_fraction=$methodcond.min_consensus_fraction
118 #end if
119
120 #if $methodcond.similarity:
121 --similarity=$methodcond.similarity
122 #end if
123
124 #if $methodcond.uclust_max_accepts:
125 --uclust_max_accepts=$methodcond.uclust_max_accepts
126 #end if
127 #end if
128 -o assign_taxonomy_output
129 ]]>
130 </command>
131
132 <inputs>
133 <param label="-i/--input_fasta_fp: path to the input fasta file"
134 name="input_fasta_fp" optional="False" type="data"/>
135 <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt"
136 label="-t/--id_to_taxonomy_fp: Path to tab-delimited file mapping
137 sequences to assigned taxonomy. Each assigned taxonomy is provided as
138 a semicolon-separated list. For assignment with rdp, each assigned
139 taxonomy must be exactly 6 levels deep. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt]"
140 name="id_to_taxonomy_fp" optional="True" type="data"/>
141 <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta"
142 label="-r/--reference_seqs_fp: Path to reference sequences. For
143 assignment with blast, these are used to generate a blast database.
144 For assignment with rdp, they are used as training sequences for the
145 classifier. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta]"
146 name="reference_seqs_fp" optional="True" type="data"/>
147
148 <conditional name="methodcond">
149 <param label="-m/--assignment_method: Taxon assignment method, must be
150 one of rdp, blast, rtax, mothur, uclust, sortmerna [default: uclust]"
151 name="assignment_method" optional="False" type="select">
152 <option selected="True" value="uclust">uclust</option>
153 <option value="rdp">rdp</option>
154 <option value="blast">blast</option>
155 <option value="rtax">rtax</option>
156 <option value="mothur">mothur</option>
157 <option value="sortmerna">sortmerna</option>
158 </param>
159 <when value="rtax">
160 <param label="--single_ok: When classifying paired ends, allow
161 fallback to single-ended classification when the mate pair is
162 lacking (used for RTAX only). [default: False]" name="single_ok"
163 selected="False" type="boolean"/>
164 <param label="--no_single_ok_generic: When classifying paired ends,
165 do not allow fallback to single-ended classification when the
166 mate pair is overly generic (used for RTAX only). [default: False]"
167 name="no_single_ok_generic" selected="False" type="boolean"/>
168 <param default="\S+\s+(\S+)" label="--read_id_regex: Used to parse
169 the result of OTU clustering, to get the read_1_id for each
170 clusterID. The clusterID itself is assumed to be the first
171 field, and is not captured by the regex. (used for RTAX only).
172 [default: \S+\s+(\S+)]" name="read_id_regex" optional="True"
173 type="text"/>
174 <param default="(\S+)\s+(\S+?)\/" label="--amplicon_id_regex: Used
175 to parse the result of split_libraries, to get the ampliconID
176 for each read_1_id. Two groups capture read_1_id and ampliconID,
177 respectively. (used for RTAX only). [default: (\S+)\s+(\S+?)\/]"
178 name="amplicon_id_regex" optional="True" type="text"/>
179 <param default="\S+\s+(\S+?)\/" label="--header_id_regex: Used to
180 parse the result of split_libraries, to get the portion of the
181 header that RTAX uses to match mate pairs. The default uses
182 the amplicon ID, not including /1 or /3, as the primary key
183 for the query sequences. Typically this regex will be the
184 same as amplicon_id_regex, except that only the second group
185 is captured. (used for RTAX only). [default: \S+\s+(\S+?)\/]"
186 name="header_id_regex" optional="True" type="text"/>
187 </when>
188 <when value="sortmerna">
189 <param label="--sortmerna_db: Pre-existing database to search
190 against when using sortmerna [default: None]" name="sortmerna_db"
191 optional="True" type="text"/>
192 <param default="1.0" label="--sortmerna_e_value: Maximum E-value
193 when clustering [default = 1.0]" name="sortmerna_e_value"
194 optional="True" type="float"/>
195 <param default="0.9" label="--sortmerna_coverage: Mininum percent
196 query coverage (of an alignment) to consider a hit, expressed
197 as a fraction between 0 and 1 [default: 0.9]"
198 name="sortmerna_coverage" optional="True" type="float"/>
199 <param default="5" label="--sortmerna_best_N_alignments: This option
200 specifies how many best alignments per read will be written
201 [default: 5]" name="sortmerna_best_N_alignments" optional="True"
202 type="integer"/>
203 <param default="1" label="--sortmerna_threads: Specify number of
204 threads to be used for sortmerna mapper which utilizes multithreading.
205 [default: 1]" name="sortmerna_threads" optional="True"
206 type="text"/>
207 <param default="0.51" label="--min_consensus_fraction: Minimum
208 fraction of database hits that must have a specific taxonomic
209 assignment to assign that taxonomy to a query, only used for
210 sortmerna and uclust methods [default: 0.51]"
211 name="min_consensus_fraction" optional="True" type="float"/>
212 <param default="0.9" label="--similarity: Minimum percent similarity
213 (expressed as a fraction between 0 and 1) to consider a database
214 match a hit, only used for sortmerna and uclust methods
215 [default: 0.9]" name="similarity" optional="True" type="float"/>
216 </when>
217 <when value="blast">
218 <param label="-b/--blast_db: Database to blast against. Must provide
219 either --blast_db or --reference_seqs_db for assignment with blast
220 [default: None]" name="blast_db" optional="True" type="data"/>
221 <param default="0.001" label="-e/--blast_e_value: Maximum e-value
222 to record an assignment, only used for blast method [default:
223 0.001]" name="blast_e_value" optional="True" type="float"/>
224 </when>
225 <when value="rdp">
226 <param default="0.5" label="-c/--confidence: Minimum confidence to
227 record an assignment, only used for rdp and mothur methods
228 [default: 0.5]" name="confidence" optional="True" type="float"/>
229 <param default="4000" label="--rdp_max_memory: Maximum memory
230 allocation, in MB, for Java virtual machine when using the
231 rdp method. Increase for large training sets [default: 4000]"
232 name="rdp_max_memory" optional="True" type="integer"/>
233 </when>
234 <when value="mothur">
235 <param default="0.5" label="-c/--confidence: Minimum confidence to
236 record an assignment, only used for rdp and mothur methods
237 [default: 0.5]" name="confidence" optional="True" type="float"/>
238 </when>
239 <when value="uclust">
240 <param default="0.51" label="--min_consensus_fraction: Minimum
241 fraction of database hits that must have a specific taxonomic
242 assignment to assign that taxonomy to a query, only used for
243 sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction"
244 optional="True" type="float"/>
245 <param default="0.9" label="--similarity: Minimum percent similarity
246 (expressed as a fraction between 0 and 1) to consider a database
247 match a hit, only used for sortmerna and uclust methods [default:
248 0.9]" name="similarity" optional="True" type="float"/>
249 <param default="3" label="--uclust_max_accepts: Number of database
250 hits to consider when making an assignment, only used for uclust
251 method [default: 3]" name="uclust_max_accepts" optional="True"
252 type="integer"/>
253 </when>
254 </conditional>
255 </inputs>
256
257 <outputs>
258 <data format="txt" from_work_dir="assign_taxonomy_output/*.log"
259 label="tax_assignements.log" name="tax_assignements.log"/>
260 <data format="txt" from_work_dir="assign_taxonomy_output/*.txt"
261 label="tax_assignements.txt" name="tax_assignements.txt"/>
262 </outputs>
263
264 <tests>
265 <test>
266 </test>
267 </tests>
268
269 <help><![CDATA[
270 **What it does**
271
272 Contains code for assigning taxonomy, using several techniques.
273
274 Given a set of sequences, %prog attempts to assign the taxonomy of each sequence.
275 Currently the methods implemented are assignment with BLAST, the RDP classifier,
276 RTAX, mothur, and uclust. The output of this step is an observation metadata
277 mapping file of input sequence identifiers (1st column of output file) to taxonomy
278 (2nd column) and quality score (3rd column). There may be method-specific information
279 in subsequent columns.
280
281 Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in
282 the Greengenes reference OTU builds. To get the latest build of the Greengenes OTUs
283 (and other marker gene OTU collections), follow the "Resources" link from http://qiime.org.
284 After downloading and unzipping you can use the following files as -r and -t, where
285 <otus_dir> is the name of the new directory after unzipping the reference OTUs tgz
286 file.
287
288 -r <otus_dir>/rep_set/97_otus.fasta
289 -t <otus_dir></otus_dir>/taxonomy/97_otu_taxonomy.txt
290
291 The consensus taxonomy assignment implemented here is the most detailed lineage
292 description shared by 90% or more of the sequences within the OTU (this level of
293 agreement can be adjusted by the user). The full lineage information for each
294 sequence is one of the output files of the analysis. In addition, a conflict file
295 records cases in which a phylum-level taxonomy assignment disagreement exists
296 within an OTU (such instances are rare and can reflect sequence misclassification
297 within the greengenes database).
298 ]]>
299 </help>
300
301 <citations>
302 <expand macro="citations" />
303 </citations>
304 </tool>