Mercurial > repos > iuc > mapseq
view mapseq.xml @ 0:16f561c480bb draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mapseq commit 3652500c9a0b6d92f6dc254cea7dcfcc6522d842
| author | iuc |
|---|---|
| date | Mon, 14 Oct 2024 12:27:57 +0000 |
| parents | |
| children |
line wrap: on
line source
<tool id="mapseq" name="MAPseq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> <description>sequence read classification designed to assign taxonomy and OTU classifications</description> <macros> <token name="@TOOL_VERSION@">2.1.1b</token> <token name="@VERSION_SUFFIX@">0</token> </macros> <xrefs> <xref type="bio.tools">mapseq</xref> </xrefs> <requirements> <requirement type="package" version="5.26">perl</requirement> <requirement type="package" version="@TOOL_VERSION@">mapseq</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "no": ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta && ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt && ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster && #end if #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "yes": ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta && ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt && ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster && ln -s '${ref_db.db_cached.fields.path}'/*.otu db.otu && #end if #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "no": ln -s '${ref_db.database}' db.fasta && ln -s '${ref_db.taxonomy}' taxonomy.txt && ln -s '${ref_db.mscluster}' db.fasta.mscluster && #end if #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "yes": ln -s '${ref_db.database}' db.fasta && ln -s '${ref_db.taxonomy}' taxonomy.txt && ln -s '${ref_db.mscluster}' db.fasta.mscluster && ln -s '${ref_db.mapseq2biom.otu_table}' db.otu && #end if mapseq -nthreads \${GALAXY_SLOTS:-8} #if str($seed) != "" -seed '$seed' #end if -tophits '$tophits' -topotus '$topotus' -minscore '$minscore' -minid1 '$minid1' -minid2 '$minid2' -otulim '$otulim' -outfmt '$outfmt' '$sequences' db.fasta taxonomy.txt > '$classifications' #if $ref_db.mapseq2biom.mapseq2biom == "yes": && perl '$__tool_directory__/mapseq2biom.pl' --otuTable db.otu --query '$classifications' --outfile '$otu_tsv' --taxid --notaxidfile '$otu_tsv_notaxid' #if $ref_db.mapseq2biom.krona_input == 'yes': --krona '$krona_format' #end if #end if ]]></command> <inputs> <param type="data" name="sequences" format="fasta" label="Input sequences" /> <conditional name="ref_db"> <param name="db_source" type="select" label="Use cached database or database from history" help=""> <option value="cached">Cached database</option> <option value="history">From history</option> </param> <when value="cached"> <param name="db_cached" type="select" label="Using built-in mapseq DB" help=""> <options from_data_table="mapseq_db"> <column name="value" index="0" /> <column name="name" index="1" /> <column name="version" index="2" /> <column name="path" index="3" /> <filter type="sort_by" column="1"/> </options> <validator type="no_options" message="A built-in mapseq DB is not available. Please ask the Galaxy admins to install one on the server." /> </param> <conditional name="mapseq2biom"> <param type="select" name="mapseq2biom" label="Create OTU table" help="Creates a tab-separated OTU table (including taxonomy classification) that can be used to create BIOM files"> <option value="yes">Yes</option> <option value="no">No</option> </param> <when value="yes"> <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" label="Create taxon table for Krona" help="Generates a reads per taxon file suitable for the use with Krona" /> </when> <when value="no" /> </conditional> </when> <when value="history"> <param type="data" name="database" label="Database file (FASTA format)" format="fasta" /> <param type="data" name="taxonomy" label="Taxonomy file" format="tabular" /> <param type="data" name="mscluster" label="Database cluster" format="txt" optional="true" /> <conditional name="mapseq2biom"> <param type="select" name="mapseq2biom" label="Create out of the MAPseq output a tab-separated output file?"> <option value="yes">Yes</option> <option value="no">No</option> </param> <when value="yes"> <param type="data" name="otu_table" format="txt" label="OTU table" help="The OTU table produced for the taxonomies found in the reference databases that was used with MAPseq" /> <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" help="Generates an output file suitable for the use with Krona" /> </when> <when value="no" /> </conditional> </when> </conditional> <param argument="-seed" type="integer" label="Fix random seed" help="Sets a fixed integer seed value for random number generation, ensuring reproducible results" optional="true"/> <param argument="-tophits" type="integer" label="Top hits" help="Number of reference sequences to include in alignment phase" value="20" min="1" max="200" /> <param argument="-topotus" type="integer" label="Top OTUs" help="Number of internal reference otus to include in alignment phase" value="10" min="1" max="200" /> <param argument="-minscore" type="integer" label="Minimum score" help="Minimum score cutoff to consider for a classification, should be reduced when searching very small sequences, i.e.: primer search" value="30" min="1" max="50" /> <param argument="-minid1" type="integer" label="Minimum number of shared kmers" help="Minimum number of shared kmers to consider hit in second phase kmer search" value="1" min="1" max="10" /> <param argument="-minid2" type="integer" label="Number of ref. sequences" help="Number of reference sequences to include in alignment phase" value="1" min="1" max="10" /> <param argument="-otulim" type="integer" label="OTU limit" help="Minimum number of shared kmers to consider hit in alignment phase" value="50" min="1" max="60" /> <param argument="-outfmt" type="select" label="Output format" help="The `confidences` format outputs confidence values for each of the taxonomic levels. "> <option value="simple">simple</option> <option value="confidences">confidences</option> </param> </inputs> <outputs> <data format="tabular" name="classifications" label="Classification results"/> <data name="otu_tsv" format="tabular" label="tab-output including taxIDs"> <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter> </data> <data name="otu_tsv_notaxid" format="tabular" label="tab-output without taxIDs" > <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter> </data> <data name="krona_format" format="tabular" label="Krona input" > <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes" and ref_db['mapseq2biom']['krona_input'])</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <param name="db_source" value="history" /> <param name="sequences" value="sequences.fasta"/> <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> <param name="mapseq2biom" value="no"/> <output name="classifications" file="sequences.mapseq" sort="true"/> <assert_command> <has_text text="-seed" negate="true" /> </assert_command> </test> <test expect_num_outputs="1"> <param name="db_source" value="history" /> <param name="sequences" value="sequences.fasta"/> <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> <param name="mapseq2biom" value="no"/> <output name="classifications" file="sequences.mapseq" sort="true"/> <assert_command> <has_text text="-seed" negate="true" /> </assert_command> </test> <test expect_num_outputs="3"> <param name="db_source" value="history" /> <param name="sequences" value="sequences.fasta"/> <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> <param name="mapseq2biom" value="yes"/> <param name="krona_input" value="no"/> <param name="otu_table" value="mapseq_db/test.otu" /> <param name="seed" value="12" /> <output name="classifications" file="sequences.mapseq" sort="true"/> <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> <assert_command> <has_text text="-seed '12'" n="1" /> </assert_command> </test> <test expect_num_outputs="4"> <param name="db_source" value="history" /> <param name="sequences" value="sequences.fasta"/> <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> <param name="mapseq2biom" value="yes"/> <param name="krona_input" value="yes"/> <param name="otu_table" value="mapseq_db/test.otu" /> <output name="classifications" file="sequences.mapseq" sort="true"/> <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> <output name="krona_format" file="mapseq2biom/krona_input.tabular" /> <assert_command> <has_text text="-seed" negate="true" /> </assert_command> </test> <test expect_num_outputs="1"> <param name="db_source" value="cached" /> <param name="db_cached" value="test_mapseq_db" /> <param name="sequences" value="sequences.fasta"/> <param name="mapseq2biom" value="no"/> <output name="classifications" file="sequences.mapseq" sort="true"/> <assert_command> <has_text text="-seed" negate="true" /> </assert_command> </test> <test expect_num_outputs="3"> <param name="db_source" value="cached" /> <param name="db_cached" value="test_mapseq_db" /> <param name="sequences" value="sequences.fasta"/> <param name="mapseq2biom" value="yes"/> <param name="krona_input" value="no"/> <output name="classifications" file="sequences.mapseq" sort="true"/> <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> <assert_command> <has_text text="-seed" negate="true" /> </assert_command> </test> <test expect_num_outputs="4"> <param name="db_source" value="cached" /> <param name="db_cached" value="test_mapseq_db" /> <param name="sequences" value="sequences.fasta"/> <param name="mapseq2biom" value="yes"/> <param name="krona_input" value="yes"/> <param name="seed" value="12" /> <output name="classifications" file="sequences.mapseq" sort="true"/> <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> <output name="krona_format" file="mapseq2biom/krona_input.tabular" /> <assert_command> <has_text text="-seed '12'" n="1" /> </assert_command> </test> </tests> <help><![CDATA[ MAPseq ====== MAPseq is a set of fast and accurate sequence read classification tools designed to assign taxonomy and OTU classifications to ribosomal RNA sequences. This is done by using a reference set of full-length ribosomal RNA sequences for which known taxonomies are known, and for which a set of high quality OTU clusters has been previously generated. For each read, the best guess and correspoding confidence in the assignment is shown at each taxonomic and OTU level. Mapseq2biom =========== This downstream script summaries the mapseq output as an OTU table (including taxon information) as reads per OTU. This requires as input an OTU to taxon mapping, for the taxonomy used to run the mapseq tool. Example ------- Mapseq output: :: # mapseq v1.2.3 (Oct 2 2018) #query dbhit bitscore identity matches mismatches gaps query_start query_end dbhit_start dbhit_end strand ITS2 test.1 355527192 204 0.9863636493682861 217 1 2 0 218 0 220 - sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata test.2 555948006 248 0.8478803038597107 340 42 19 200 582 192 593 - sk__Eukaryota;k__Fungi test.4 406352048 217 0.9127272963523865 251 22 2 106 381 169 442 - sk__Eukaryota;k__Fungi;p__ OTU to taxon mapping: :: 1 sk__Eukaryota;k__Fungi 2 sk__Eukaryota;k__Fungi;p__;c__;o__;f__;g__;s__uncultured_fungus 3 sk__Eukaryota;k__Fungi;p__Ascomycota 4 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales 5 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Astrosphaeriellaceae;g__Pithomyces 6 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Coniothyriaceae;g__Coniothyrium 7 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae 8 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma 9 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata 10 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymosphaeriaceae;g__Paraconiothyrium;s__Paraconiothyrium_cyclothyrioides OTU output: :: # Constructed from biom file # OTU ID label taxonomy 1 2.0 sk__Eukaryota;k__Fungi 9 1.0 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata Taxon output for Krona: :: 2 sk__Eukaryota k__Fungi 1 sk__Eukaryota k__Fungi p__Ascomycota c__Dothideomycetes o__Pleosporales f__Didymellaceae g__Ectophoma s__Ectophoma_multirostrata Source ------ * `GitHub <https://github.com/EBI-Metagenomics/pipeline-v5/blob/master/tools/RNA_prediction/mapseq2biom/mapseq2biom.pl>`_ License ------- * `Apache-2.0 license <https://raw.githubusercontent.com/EBI-Metagenomics/pipeline-v5/master/LICENSE>`_ ]]></help> <creator> <person givenName="Rand" familyName="Zoabi" url="https://github.com/RZ9082"/> <person givenName="Paul" familyName="Zierep" url="https://github.com/paulzierep"/> </creator> <citations> <citation type="doi"> 10.1093/bioinformatics/btx517 </citation> <citation type="doi"> 10.1093/nar/gkac1080 </citation> </citations> </tool>
