comparison mapseq.xml @ 0:16f561c480bb draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mapseq commit 3652500c9a0b6d92f6dc254cea7dcfcc6522d842
author iuc
date Mon, 14 Oct 2024 12:27:57 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:16f561c480bb
1 <tool id="mapseq" name="MAPseq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
2 <description>sequence read classification designed to assign taxonomy and OTU classifications</description>
3 <macros>
4 <token name="@TOOL_VERSION@">2.1.1b</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 </macros>
7 <xrefs>
8 <xref type="bio.tools">mapseq</xref>
9 </xrefs>
10 <requirements>
11 <requirement type="package" version="5.26">perl</requirement>
12 <requirement type="package" version="@TOOL_VERSION@">mapseq</requirement>
13 </requirements>
14 <command detect_errors="exit_code"><![CDATA[
15
16 #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "no":
17 ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta &&
18 ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt &&
19 ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster &&
20 #end if
21
22 #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "yes":
23 ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta &&
24 ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt &&
25 ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster &&
26 ln -s '${ref_db.db_cached.fields.path}'/*.otu db.otu &&
27 #end if
28
29 #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "no":
30 ln -s '${ref_db.database}' db.fasta &&
31 ln -s '${ref_db.taxonomy}' taxonomy.txt &&
32 ln -s '${ref_db.mscluster}' db.fasta.mscluster &&
33 #end if
34
35 #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "yes":
36 ln -s '${ref_db.database}' db.fasta &&
37 ln -s '${ref_db.taxonomy}' taxonomy.txt &&
38 ln -s '${ref_db.mscluster}' db.fasta.mscluster &&
39 ln -s '${ref_db.mapseq2biom.otu_table}' db.otu &&
40 #end if
41 mapseq
42 -nthreads \${GALAXY_SLOTS:-8}
43 #if str($seed) != ""
44 -seed '$seed'
45 #end if
46 -tophits '$tophits'
47 -topotus '$topotus'
48 -minscore '$minscore'
49 -minid1 '$minid1'
50 -minid2 '$minid2'
51 -otulim '$otulim'
52 -outfmt '$outfmt'
53 '$sequences' db.fasta taxonomy.txt > '$classifications'
54
55 #if $ref_db.mapseq2biom.mapseq2biom == "yes":
56 &&
57 perl '$__tool_directory__/mapseq2biom.pl' --otuTable db.otu --query '$classifications' --outfile '$otu_tsv' --taxid --notaxidfile '$otu_tsv_notaxid'
58 #if $ref_db.mapseq2biom.krona_input == 'yes':
59 --krona '$krona_format'
60 #end if
61 #end if
62
63 ]]></command>
64
65 <inputs>
66 <param type="data" name="sequences" format="fasta" label="Input sequences" />
67 <conditional name="ref_db">
68 <param name="db_source" type="select" label="Use cached database or database from history" help="">
69 <option value="cached">Cached database</option>
70 <option value="history">From history</option>
71 </param>
72 <when value="cached">
73 <param name="db_cached" type="select" label="Using built-in mapseq DB" help="">
74 <options from_data_table="mapseq_db">
75 <column name="value" index="0" />
76 <column name="name" index="1" />
77 <column name="version" index="2" />
78 <column name="path" index="3" />
79 <filter type="sort_by" column="1"/>
80 </options>
81 <validator type="no_options" message="A built-in mapseq DB is not available. Please ask the Galaxy admins to install one on the server." />
82 </param>
83 <conditional name="mapseq2biom">
84 <param type="select" name="mapseq2biom" label="Create OTU table" help="Creates a tab-separated OTU table (including taxonomy classification) that can be used to create BIOM files">
85 <option value="yes">Yes</option>
86 <option value="no">No</option>
87 </param>
88 <when value="yes">
89 <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" label="Create taxon table for Krona" help="Generates a reads per taxon file suitable for the use with Krona" />
90 </when>
91 <when value="no" />
92 </conditional>
93 </when>
94 <when value="history">
95 <param type="data" name="database" label="Database file (FASTA format)" format="fasta" />
96 <param type="data" name="taxonomy" label="Taxonomy file" format="tabular" />
97 <param type="data" name="mscluster" label="Database cluster" format="txt" optional="true" />
98 <conditional name="mapseq2biom">
99 <param type="select" name="mapseq2biom" label="Create out of the MAPseq output a tab-separated output file?">
100 <option value="yes">Yes</option>
101 <option value="no">No</option>
102 </param>
103 <when value="yes">
104 <param type="data" name="otu_table" format="txt" label="OTU table" help="The OTU table produced for the taxonomies found in the reference databases that was used with MAPseq" />
105 <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" help="Generates an output file suitable for the use with Krona" />
106 </when>
107 <when value="no" />
108 </conditional>
109 </when>
110 </conditional>
111
112 <param argument="-seed" type="integer" label="Fix random seed" help="Sets a fixed integer seed value for random number generation, ensuring reproducible results" optional="true"/>
113
114 <param argument="-tophits" type="integer" label="Top hits" help="Number of reference sequences to include in alignment phase"
115 value="20" min="1" max="200" />
116
117 <param argument="-topotus" type="integer" label="Top OTUs" help="Number of internal reference otus to include in alignment phase"
118 value="10" min="1" max="200" />
119
120 <param argument="-minscore" type="integer" label="Minimum score"
121 help="Minimum score cutoff to consider for a classification, should be reduced when searching very small sequences, i.e.: primer search"
122 value="30" min="1" max="50" />
123
124 <param argument="-minid1" type="integer" label="Minimum number of shared kmers" help="Minimum number of shared kmers to consider hit in second phase kmer search"
125 value="1" min="1" max="10" />
126
127 <param argument="-minid2" type="integer" label="Number of ref. sequences" help="Number of reference sequences to include in alignment phase"
128 value="1" min="1" max="10" />
129
130 <param argument="-otulim" type="integer" label="OTU limit" help="Minimum number of shared kmers to consider hit in alignment phase"
131 value="50" min="1" max="60" />
132
133 <param argument="-outfmt" type="select" label="Output format" help="The `confidences` format outputs confidence values for each of the taxonomic levels. ">
134 <option value="simple">simple</option>
135 <option value="confidences">confidences</option>
136 </param>
137 </inputs>
138
139 <outputs>
140 <data format="tabular" name="classifications" label="Classification results"/>
141 <data name="otu_tsv" format="tabular" label="tab-output including taxIDs">
142 <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter>
143 </data>
144 <data name="otu_tsv_notaxid" format="tabular" label="tab-output without taxIDs" >
145 <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter>
146 </data>
147 <data name="krona_format" format="tabular" label="Krona input" >
148 <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes" and ref_db['mapseq2biom']['krona_input'])</filter>
149 </data>
150 </outputs>
151
152 <tests>
153 <test expect_num_outputs="1">
154 <param name="db_source" value="history" />
155 <param name="sequences" value="sequences.fasta"/>
156 <param name="database" value="mapseq_db/LSU_trimmed.fasta"/>
157 <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/>
158 <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/>
159 <param name="mapseq2biom" value="no"/>
160 <output name="classifications" file="sequences.mapseq" sort="true"/>
161 <assert_command>
162 <has_text text="-seed" negate="true" />
163 </assert_command>
164 </test>
165 <test expect_num_outputs="1">
166 <param name="db_source" value="history" />
167 <param name="sequences" value="sequences.fasta"/>
168 <param name="database" value="mapseq_db/LSU_trimmed.fasta"/>
169 <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/>
170 <param name="mapseq2biom" value="no"/>
171 <output name="classifications" file="sequences.mapseq" sort="true"/>
172 <assert_command>
173 <has_text text="-seed" negate="true" />
174 </assert_command>
175 </test>
176 <test expect_num_outputs="3">
177 <param name="db_source" value="history" />
178 <param name="sequences" value="sequences.fasta"/>
179 <param name="database" value="mapseq_db/LSU_trimmed.fasta"/>
180 <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/>
181 <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/>
182 <param name="mapseq2biom" value="yes"/>
183 <param name="krona_input" value="no"/>
184 <param name="otu_table" value="mapseq_db/test.otu" />
185 <param name="seed" value="12" />
186 <output name="classifications" file="sequences.mapseq" sort="true"/>
187 <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" />
188 <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" />
189 <assert_command>
190 <has_text text="-seed '12'" n="1" />
191 </assert_command>
192 </test>
193 <test expect_num_outputs="4">
194 <param name="db_source" value="history" />
195 <param name="sequences" value="sequences.fasta"/>
196 <param name="database" value="mapseq_db/LSU_trimmed.fasta"/>
197 <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/>
198 <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/>
199 <param name="mapseq2biom" value="yes"/>
200 <param name="krona_input" value="yes"/>
201 <param name="otu_table" value="mapseq_db/test.otu" />
202 <output name="classifications" file="sequences.mapseq" sort="true"/>
203 <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" />
204 <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" />
205 <output name="krona_format" file="mapseq2biom/krona_input.tabular" />
206 <assert_command>
207 <has_text text="-seed" negate="true" />
208 </assert_command>
209 </test>
210 <test expect_num_outputs="1">
211 <param name="db_source" value="cached" />
212 <param name="db_cached" value="test_mapseq_db" />
213 <param name="sequences" value="sequences.fasta"/>
214 <param name="mapseq2biom" value="no"/>
215 <output name="classifications" file="sequences.mapseq" sort="true"/>
216 <assert_command>
217 <has_text text="-seed" negate="true" />
218 </assert_command>
219 </test>
220 <test expect_num_outputs="3">
221 <param name="db_source" value="cached" />
222 <param name="db_cached" value="test_mapseq_db" />
223 <param name="sequences" value="sequences.fasta"/>
224 <param name="mapseq2biom" value="yes"/>
225 <param name="krona_input" value="no"/>
226 <output name="classifications" file="sequences.mapseq" sort="true"/>
227 <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" />
228 <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" />
229 <assert_command>
230 <has_text text="-seed" negate="true" />
231 </assert_command>
232 </test>
233 <test expect_num_outputs="4">
234 <param name="db_source" value="cached" />
235 <param name="db_cached" value="test_mapseq_db" />
236 <param name="sequences" value="sequences.fasta"/>
237 <param name="mapseq2biom" value="yes"/>
238 <param name="krona_input" value="yes"/>
239 <param name="seed" value="12" />
240 <output name="classifications" file="sequences.mapseq" sort="true"/>
241 <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" />
242 <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" />
243 <output name="krona_format" file="mapseq2biom/krona_input.tabular" />
244 <assert_command>
245 <has_text text="-seed '12'" n="1" />
246 </assert_command>
247 </test>
248 </tests>
249
250 <help><![CDATA[
251 MAPseq
252 ======
253 MAPseq is a set of fast and accurate sequence read classification tools
254 designed to assign taxonomy and OTU classifications to ribosomal RNA sequences.
255 This is done by using a reference set of full-length ribosomal RNA sequences
256 for which known taxonomies are known, and for which a set of high quality
257 OTU clusters has been previously generated. For each read, the best guess
258 and correspoding confidence in the assignment is shown at each taxonomic and OTU level.
259
260 Mapseq2biom
261 ===========
262 This downstream script summaries the mapseq output as an OTU table
263 (including taxon information) as reads per OTU. This requires as input
264 an OTU to taxon mapping, for the taxonomy used to run the mapseq tool.
265
266
267 Example
268 -------
269
270 Mapseq output:
271
272 ::
273
274 # mapseq v1.2.3 (Oct 2 2018)
275 #query dbhit bitscore identity matches mismatches gaps query_start query_end dbhit_start dbhit_end strand ITS2
276 test.1 355527192 204 0.9863636493682861 217 1 2 0 218 0 220 - sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata
277 test.2 555948006 248 0.8478803038597107 340 42 19 200 582 192 593 - sk__Eukaryota;k__Fungi
278 test.4 406352048 217 0.9127272963523865 251 22 2 106 381 169 442 - sk__Eukaryota;k__Fungi;p__
279
280 OTU to taxon mapping:
281
282 ::
283
284 1 sk__Eukaryota;k__Fungi
285 2 sk__Eukaryota;k__Fungi;p__;c__;o__;f__;g__;s__uncultured_fungus
286 3 sk__Eukaryota;k__Fungi;p__Ascomycota
287 4 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales
288 5 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Astrosphaeriellaceae;g__Pithomyces
289 6 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Coniothyriaceae;g__Coniothyrium
290 7 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae
291 8 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma
292 9 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata
293 10 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymosphaeriaceae;g__Paraconiothyrium;s__Paraconiothyrium_cyclothyrioides
294
295 OTU output:
296
297 ::
298
299 # Constructed from biom file
300 # OTU ID label taxonomy
301 1 2.0 sk__Eukaryota;k__Fungi
302 9 1.0 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata
303
304 Taxon output for Krona:
305
306 ::
307
308 2 sk__Eukaryota k__Fungi
309 1 sk__Eukaryota k__Fungi p__Ascomycota c__Dothideomycetes o__Pleosporales f__Didymellaceae g__Ectophoma s__Ectophoma_multirostrata
310
311 Source
312 ------
313 * `GitHub <https://github.com/EBI-Metagenomics/pipeline-v5/blob/master/tools/RNA_prediction/mapseq2biom/mapseq2biom.pl>`_
314
315 License
316 -------
317 * `Apache-2.0 license <https://raw.githubusercontent.com/EBI-Metagenomics/pipeline-v5/master/LICENSE>`_
318 ]]></help>
319 <creator>
320 <person givenName="Rand" familyName="Zoabi" url="https://github.com/RZ9082"/>
321 <person givenName="Paul" familyName="Zierep" url="https://github.com/paulzierep"/>
322 </creator>
323 <citations>
324 <citation type="doi">
325 10.1093/bioinformatics/btx517
326 </citation>
327 <citation type="doi">
328 10.1093/nar/gkac1080
329 </citation>
330 </citations>
331 </tool>