Mercurial > repos > iuc > mapseq
comparison mapseq.xml @ 0:16f561c480bb draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mapseq commit 3652500c9a0b6d92f6dc254cea7dcfcc6522d842
| author | iuc |
|---|---|
| date | Mon, 14 Oct 2024 12:27:57 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:16f561c480bb |
|---|---|
| 1 <tool id="mapseq" name="MAPseq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> | |
| 2 <description>sequence read classification designed to assign taxonomy and OTU classifications</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">2.1.1b</token> | |
| 5 <token name="@VERSION_SUFFIX@">0</token> | |
| 6 </macros> | |
| 7 <xrefs> | |
| 8 <xref type="bio.tools">mapseq</xref> | |
| 9 </xrefs> | |
| 10 <requirements> | |
| 11 <requirement type="package" version="5.26">perl</requirement> | |
| 12 <requirement type="package" version="@TOOL_VERSION@">mapseq</requirement> | |
| 13 </requirements> | |
| 14 <command detect_errors="exit_code"><![CDATA[ | |
| 15 | |
| 16 #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "no": | |
| 17 ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta && | |
| 18 ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt && | |
| 19 ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster && | |
| 20 #end if | |
| 21 | |
| 22 #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "yes": | |
| 23 ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta && | |
| 24 ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt && | |
| 25 ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster && | |
| 26 ln -s '${ref_db.db_cached.fields.path}'/*.otu db.otu && | |
| 27 #end if | |
| 28 | |
| 29 #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "no": | |
| 30 ln -s '${ref_db.database}' db.fasta && | |
| 31 ln -s '${ref_db.taxonomy}' taxonomy.txt && | |
| 32 ln -s '${ref_db.mscluster}' db.fasta.mscluster && | |
| 33 #end if | |
| 34 | |
| 35 #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "yes": | |
| 36 ln -s '${ref_db.database}' db.fasta && | |
| 37 ln -s '${ref_db.taxonomy}' taxonomy.txt && | |
| 38 ln -s '${ref_db.mscluster}' db.fasta.mscluster && | |
| 39 ln -s '${ref_db.mapseq2biom.otu_table}' db.otu && | |
| 40 #end if | |
| 41 mapseq | |
| 42 -nthreads \${GALAXY_SLOTS:-8} | |
| 43 #if str($seed) != "" | |
| 44 -seed '$seed' | |
| 45 #end if | |
| 46 -tophits '$tophits' | |
| 47 -topotus '$topotus' | |
| 48 -minscore '$minscore' | |
| 49 -minid1 '$minid1' | |
| 50 -minid2 '$minid2' | |
| 51 -otulim '$otulim' | |
| 52 -outfmt '$outfmt' | |
| 53 '$sequences' db.fasta taxonomy.txt > '$classifications' | |
| 54 | |
| 55 #if $ref_db.mapseq2biom.mapseq2biom == "yes": | |
| 56 && | |
| 57 perl '$__tool_directory__/mapseq2biom.pl' --otuTable db.otu --query '$classifications' --outfile '$otu_tsv' --taxid --notaxidfile '$otu_tsv_notaxid' | |
| 58 #if $ref_db.mapseq2biom.krona_input == 'yes': | |
| 59 --krona '$krona_format' | |
| 60 #end if | |
| 61 #end if | |
| 62 | |
| 63 ]]></command> | |
| 64 | |
| 65 <inputs> | |
| 66 <param type="data" name="sequences" format="fasta" label="Input sequences" /> | |
| 67 <conditional name="ref_db"> | |
| 68 <param name="db_source" type="select" label="Use cached database or database from history" help=""> | |
| 69 <option value="cached">Cached database</option> | |
| 70 <option value="history">From history</option> | |
| 71 </param> | |
| 72 <when value="cached"> | |
| 73 <param name="db_cached" type="select" label="Using built-in mapseq DB" help=""> | |
| 74 <options from_data_table="mapseq_db"> | |
| 75 <column name="value" index="0" /> | |
| 76 <column name="name" index="1" /> | |
| 77 <column name="version" index="2" /> | |
| 78 <column name="path" index="3" /> | |
| 79 <filter type="sort_by" column="1"/> | |
| 80 </options> | |
| 81 <validator type="no_options" message="A built-in mapseq DB is not available. Please ask the Galaxy admins to install one on the server." /> | |
| 82 </param> | |
| 83 <conditional name="mapseq2biom"> | |
| 84 <param type="select" name="mapseq2biom" label="Create OTU table" help="Creates a tab-separated OTU table (including taxonomy classification) that can be used to create BIOM files"> | |
| 85 <option value="yes">Yes</option> | |
| 86 <option value="no">No</option> | |
| 87 </param> | |
| 88 <when value="yes"> | |
| 89 <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" label="Create taxon table for Krona" help="Generates a reads per taxon file suitable for the use with Krona" /> | |
| 90 </when> | |
| 91 <when value="no" /> | |
| 92 </conditional> | |
| 93 </when> | |
| 94 <when value="history"> | |
| 95 <param type="data" name="database" label="Database file (FASTA format)" format="fasta" /> | |
| 96 <param type="data" name="taxonomy" label="Taxonomy file" format="tabular" /> | |
| 97 <param type="data" name="mscluster" label="Database cluster" format="txt" optional="true" /> | |
| 98 <conditional name="mapseq2biom"> | |
| 99 <param type="select" name="mapseq2biom" label="Create out of the MAPseq output a tab-separated output file?"> | |
| 100 <option value="yes">Yes</option> | |
| 101 <option value="no">No</option> | |
| 102 </param> | |
| 103 <when value="yes"> | |
| 104 <param type="data" name="otu_table" format="txt" label="OTU table" help="The OTU table produced for the taxonomies found in the reference databases that was used with MAPseq" /> | |
| 105 <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" help="Generates an output file suitable for the use with Krona" /> | |
| 106 </when> | |
| 107 <when value="no" /> | |
| 108 </conditional> | |
| 109 </when> | |
| 110 </conditional> | |
| 111 | |
| 112 <param argument="-seed" type="integer" label="Fix random seed" help="Sets a fixed integer seed value for random number generation, ensuring reproducible results" optional="true"/> | |
| 113 | |
| 114 <param argument="-tophits" type="integer" label="Top hits" help="Number of reference sequences to include in alignment phase" | |
| 115 value="20" min="1" max="200" /> | |
| 116 | |
| 117 <param argument="-topotus" type="integer" label="Top OTUs" help="Number of internal reference otus to include in alignment phase" | |
| 118 value="10" min="1" max="200" /> | |
| 119 | |
| 120 <param argument="-minscore" type="integer" label="Minimum score" | |
| 121 help="Minimum score cutoff to consider for a classification, should be reduced when searching very small sequences, i.e.: primer search" | |
| 122 value="30" min="1" max="50" /> | |
| 123 | |
| 124 <param argument="-minid1" type="integer" label="Minimum number of shared kmers" help="Minimum number of shared kmers to consider hit in second phase kmer search" | |
| 125 value="1" min="1" max="10" /> | |
| 126 | |
| 127 <param argument="-minid2" type="integer" label="Number of ref. sequences" help="Number of reference sequences to include in alignment phase" | |
| 128 value="1" min="1" max="10" /> | |
| 129 | |
| 130 <param argument="-otulim" type="integer" label="OTU limit" help="Minimum number of shared kmers to consider hit in alignment phase" | |
| 131 value="50" min="1" max="60" /> | |
| 132 | |
| 133 <param argument="-outfmt" type="select" label="Output format" help="The `confidences` format outputs confidence values for each of the taxonomic levels. "> | |
| 134 <option value="simple">simple</option> | |
| 135 <option value="confidences">confidences</option> | |
| 136 </param> | |
| 137 </inputs> | |
| 138 | |
| 139 <outputs> | |
| 140 <data format="tabular" name="classifications" label="Classification results"/> | |
| 141 <data name="otu_tsv" format="tabular" label="tab-output including taxIDs"> | |
| 142 <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter> | |
| 143 </data> | |
| 144 <data name="otu_tsv_notaxid" format="tabular" label="tab-output without taxIDs" > | |
| 145 <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter> | |
| 146 </data> | |
| 147 <data name="krona_format" format="tabular" label="Krona input" > | |
| 148 <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes" and ref_db['mapseq2biom']['krona_input'])</filter> | |
| 149 </data> | |
| 150 </outputs> | |
| 151 | |
| 152 <tests> | |
| 153 <test expect_num_outputs="1"> | |
| 154 <param name="db_source" value="history" /> | |
| 155 <param name="sequences" value="sequences.fasta"/> | |
| 156 <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> | |
| 157 <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> | |
| 158 <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> | |
| 159 <param name="mapseq2biom" value="no"/> | |
| 160 <output name="classifications" file="sequences.mapseq" sort="true"/> | |
| 161 <assert_command> | |
| 162 <has_text text="-seed" negate="true" /> | |
| 163 </assert_command> | |
| 164 </test> | |
| 165 <test expect_num_outputs="1"> | |
| 166 <param name="db_source" value="history" /> | |
| 167 <param name="sequences" value="sequences.fasta"/> | |
| 168 <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> | |
| 169 <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> | |
| 170 <param name="mapseq2biom" value="no"/> | |
| 171 <output name="classifications" file="sequences.mapseq" sort="true"/> | |
| 172 <assert_command> | |
| 173 <has_text text="-seed" negate="true" /> | |
| 174 </assert_command> | |
| 175 </test> | |
| 176 <test expect_num_outputs="3"> | |
| 177 <param name="db_source" value="history" /> | |
| 178 <param name="sequences" value="sequences.fasta"/> | |
| 179 <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> | |
| 180 <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> | |
| 181 <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> | |
| 182 <param name="mapseq2biom" value="yes"/> | |
| 183 <param name="krona_input" value="no"/> | |
| 184 <param name="otu_table" value="mapseq_db/test.otu" /> | |
| 185 <param name="seed" value="12" /> | |
| 186 <output name="classifications" file="sequences.mapseq" sort="true"/> | |
| 187 <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> | |
| 188 <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> | |
| 189 <assert_command> | |
| 190 <has_text text="-seed '12'" n="1" /> | |
| 191 </assert_command> | |
| 192 </test> | |
| 193 <test expect_num_outputs="4"> | |
| 194 <param name="db_source" value="history" /> | |
| 195 <param name="sequences" value="sequences.fasta"/> | |
| 196 <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> | |
| 197 <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> | |
| 198 <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> | |
| 199 <param name="mapseq2biom" value="yes"/> | |
| 200 <param name="krona_input" value="yes"/> | |
| 201 <param name="otu_table" value="mapseq_db/test.otu" /> | |
| 202 <output name="classifications" file="sequences.mapseq" sort="true"/> | |
| 203 <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> | |
| 204 <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> | |
| 205 <output name="krona_format" file="mapseq2biom/krona_input.tabular" /> | |
| 206 <assert_command> | |
| 207 <has_text text="-seed" negate="true" /> | |
| 208 </assert_command> | |
| 209 </test> | |
| 210 <test expect_num_outputs="1"> | |
| 211 <param name="db_source" value="cached" /> | |
| 212 <param name="db_cached" value="test_mapseq_db" /> | |
| 213 <param name="sequences" value="sequences.fasta"/> | |
| 214 <param name="mapseq2biom" value="no"/> | |
| 215 <output name="classifications" file="sequences.mapseq" sort="true"/> | |
| 216 <assert_command> | |
| 217 <has_text text="-seed" negate="true" /> | |
| 218 </assert_command> | |
| 219 </test> | |
| 220 <test expect_num_outputs="3"> | |
| 221 <param name="db_source" value="cached" /> | |
| 222 <param name="db_cached" value="test_mapseq_db" /> | |
| 223 <param name="sequences" value="sequences.fasta"/> | |
| 224 <param name="mapseq2biom" value="yes"/> | |
| 225 <param name="krona_input" value="no"/> | |
| 226 <output name="classifications" file="sequences.mapseq" sort="true"/> | |
| 227 <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> | |
| 228 <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> | |
| 229 <assert_command> | |
| 230 <has_text text="-seed" negate="true" /> | |
| 231 </assert_command> | |
| 232 </test> | |
| 233 <test expect_num_outputs="4"> | |
| 234 <param name="db_source" value="cached" /> | |
| 235 <param name="db_cached" value="test_mapseq_db" /> | |
| 236 <param name="sequences" value="sequences.fasta"/> | |
| 237 <param name="mapseq2biom" value="yes"/> | |
| 238 <param name="krona_input" value="yes"/> | |
| 239 <param name="seed" value="12" /> | |
| 240 <output name="classifications" file="sequences.mapseq" sort="true"/> | |
| 241 <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> | |
| 242 <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> | |
| 243 <output name="krona_format" file="mapseq2biom/krona_input.tabular" /> | |
| 244 <assert_command> | |
| 245 <has_text text="-seed '12'" n="1" /> | |
| 246 </assert_command> | |
| 247 </test> | |
| 248 </tests> | |
| 249 | |
| 250 <help><![CDATA[ | |
| 251 MAPseq | |
| 252 ====== | |
| 253 MAPseq is a set of fast and accurate sequence read classification tools | |
| 254 designed to assign taxonomy and OTU classifications to ribosomal RNA sequences. | |
| 255 This is done by using a reference set of full-length ribosomal RNA sequences | |
| 256 for which known taxonomies are known, and for which a set of high quality | |
| 257 OTU clusters has been previously generated. For each read, the best guess | |
| 258 and correspoding confidence in the assignment is shown at each taxonomic and OTU level. | |
| 259 | |
| 260 Mapseq2biom | |
| 261 =========== | |
| 262 This downstream script summaries the mapseq output as an OTU table | |
| 263 (including taxon information) as reads per OTU. This requires as input | |
| 264 an OTU to taxon mapping, for the taxonomy used to run the mapseq tool. | |
| 265 | |
| 266 | |
| 267 Example | |
| 268 ------- | |
| 269 | |
| 270 Mapseq output: | |
| 271 | |
| 272 :: | |
| 273 | |
| 274 # mapseq v1.2.3 (Oct 2 2018) | |
| 275 #query dbhit bitscore identity matches mismatches gaps query_start query_end dbhit_start dbhit_end strand ITS2 | |
| 276 test.1 355527192 204 0.9863636493682861 217 1 2 0 218 0 220 - sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata | |
| 277 test.2 555948006 248 0.8478803038597107 340 42 19 200 582 192 593 - sk__Eukaryota;k__Fungi | |
| 278 test.4 406352048 217 0.9127272963523865 251 22 2 106 381 169 442 - sk__Eukaryota;k__Fungi;p__ | |
| 279 | |
| 280 OTU to taxon mapping: | |
| 281 | |
| 282 :: | |
| 283 | |
| 284 1 sk__Eukaryota;k__Fungi | |
| 285 2 sk__Eukaryota;k__Fungi;p__;c__;o__;f__;g__;s__uncultured_fungus | |
| 286 3 sk__Eukaryota;k__Fungi;p__Ascomycota | |
| 287 4 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales | |
| 288 5 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Astrosphaeriellaceae;g__Pithomyces | |
| 289 6 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Coniothyriaceae;g__Coniothyrium | |
| 290 7 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae | |
| 291 8 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma | |
| 292 9 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata | |
| 293 10 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymosphaeriaceae;g__Paraconiothyrium;s__Paraconiothyrium_cyclothyrioides | |
| 294 | |
| 295 OTU output: | |
| 296 | |
| 297 :: | |
| 298 | |
| 299 # Constructed from biom file | |
| 300 # OTU ID label taxonomy | |
| 301 1 2.0 sk__Eukaryota;k__Fungi | |
| 302 9 1.0 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata | |
| 303 | |
| 304 Taxon output for Krona: | |
| 305 | |
| 306 :: | |
| 307 | |
| 308 2 sk__Eukaryota k__Fungi | |
| 309 1 sk__Eukaryota k__Fungi p__Ascomycota c__Dothideomycetes o__Pleosporales f__Didymellaceae g__Ectophoma s__Ectophoma_multirostrata | |
| 310 | |
| 311 Source | |
| 312 ------ | |
| 313 * `GitHub <https://github.com/EBI-Metagenomics/pipeline-v5/blob/master/tools/RNA_prediction/mapseq2biom/mapseq2biom.pl>`_ | |
| 314 | |
| 315 License | |
| 316 ------- | |
| 317 * `Apache-2.0 license <https://raw.githubusercontent.com/EBI-Metagenomics/pipeline-v5/master/LICENSE>`_ | |
| 318 ]]></help> | |
| 319 <creator> | |
| 320 <person givenName="Rand" familyName="Zoabi" url="https://github.com/RZ9082"/> | |
| 321 <person givenName="Paul" familyName="Zierep" url="https://github.com/paulzierep"/> | |
| 322 </creator> | |
| 323 <citations> | |
| 324 <citation type="doi"> | |
| 325 10.1093/bioinformatics/btx517 | |
| 326 </citation> | |
| 327 <citation type="doi"> | |
| 328 10.1093/nar/gkac1080 | |
| 329 </citation> | |
| 330 </citations> | |
| 331 </tool> |
