Mercurial > repos > iuc > mapseq
diff mapseq.xml @ 0:16f561c480bb draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mapseq commit 3652500c9a0b6d92f6dc254cea7dcfcc6522d842
| author | iuc |
|---|---|
| date | Mon, 14 Oct 2024 12:27:57 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapseq.xml Mon Oct 14 12:27:57 2024 +0000 @@ -0,0 +1,331 @@ +<tool id="mapseq" name="MAPseq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> + <description>sequence read classification designed to assign taxonomy and OTU classifications</description> + <macros> + <token name="@TOOL_VERSION@">2.1.1b</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <xrefs> + <xref type="bio.tools">mapseq</xref> + </xrefs> + <requirements> + <requirement type="package" version="5.26">perl</requirement> + <requirement type="package" version="@TOOL_VERSION@">mapseq</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + + #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "no": + ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta && + ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt && + ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster && + #end if + + #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "yes": + ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta && + ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt && + ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster && + ln -s '${ref_db.db_cached.fields.path}'/*.otu db.otu && + #end if + + #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "no": + ln -s '${ref_db.database}' db.fasta && + ln -s '${ref_db.taxonomy}' taxonomy.txt && + ln -s '${ref_db.mscluster}' db.fasta.mscluster && + #end if + + #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "yes": + ln -s '${ref_db.database}' db.fasta && + ln -s '${ref_db.taxonomy}' taxonomy.txt && + ln -s '${ref_db.mscluster}' db.fasta.mscluster && + ln -s '${ref_db.mapseq2biom.otu_table}' db.otu && + #end if + mapseq + -nthreads \${GALAXY_SLOTS:-8} + #if str($seed) != "" + -seed '$seed' + #end if + -tophits '$tophits' + -topotus '$topotus' + -minscore '$minscore' + -minid1 '$minid1' + -minid2 '$minid2' + -otulim '$otulim' + -outfmt '$outfmt' + '$sequences' db.fasta taxonomy.txt > '$classifications' + + #if $ref_db.mapseq2biom.mapseq2biom == "yes": + && + perl '$__tool_directory__/mapseq2biom.pl' --otuTable db.otu --query '$classifications' --outfile '$otu_tsv' --taxid --notaxidfile '$otu_tsv_notaxid' + #if $ref_db.mapseq2biom.krona_input == 'yes': + --krona '$krona_format' + #end if + #end if + + ]]></command> + + <inputs> + <param type="data" name="sequences" format="fasta" label="Input sequences" /> + <conditional name="ref_db"> + <param name="db_source" type="select" label="Use cached database or database from history" help=""> + <option value="cached">Cached database</option> + <option value="history">From history</option> + </param> + <when value="cached"> + <param name="db_cached" type="select" label="Using built-in mapseq DB" help=""> + <options from_data_table="mapseq_db"> + <column name="value" index="0" /> + <column name="name" index="1" /> + <column name="version" index="2" /> + <column name="path" index="3" /> + <filter type="sort_by" column="1"/> + </options> + <validator type="no_options" message="A built-in mapseq DB is not available. Please ask the Galaxy admins to install one on the server." /> + </param> + <conditional name="mapseq2biom"> + <param type="select" name="mapseq2biom" label="Create OTU table" help="Creates a tab-separated OTU table (including taxonomy classification) that can be used to create BIOM files"> + <option value="yes">Yes</option> + <option value="no">No</option> + </param> + <when value="yes"> + <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" label="Create taxon table for Krona" help="Generates a reads per taxon file suitable for the use with Krona" /> + </when> + <when value="no" /> + </conditional> + </when> + <when value="history"> + <param type="data" name="database" label="Database file (FASTA format)" format="fasta" /> + <param type="data" name="taxonomy" label="Taxonomy file" format="tabular" /> + <param type="data" name="mscluster" label="Database cluster" format="txt" optional="true" /> + <conditional name="mapseq2biom"> + <param type="select" name="mapseq2biom" label="Create out of the MAPseq output a tab-separated output file?"> + <option value="yes">Yes</option> + <option value="no">No</option> + </param> + <when value="yes"> + <param type="data" name="otu_table" format="txt" label="OTU table" help="The OTU table produced for the taxonomies found in the reference databases that was used with MAPseq" /> + <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" help="Generates an output file suitable for the use with Krona" /> + </when> + <when value="no" /> + </conditional> + </when> + </conditional> + + <param argument="-seed" type="integer" label="Fix random seed" help="Sets a fixed integer seed value for random number generation, ensuring reproducible results" optional="true"/> + + <param argument="-tophits" type="integer" label="Top hits" help="Number of reference sequences to include in alignment phase" + value="20" min="1" max="200" /> + + <param argument="-topotus" type="integer" label="Top OTUs" help="Number of internal reference otus to include in alignment phase" + value="10" min="1" max="200" /> + + <param argument="-minscore" type="integer" label="Minimum score" + help="Minimum score cutoff to consider for a classification, should be reduced when searching very small sequences, i.e.: primer search" + value="30" min="1" max="50" /> + + <param argument="-minid1" type="integer" label="Minimum number of shared kmers" help="Minimum number of shared kmers to consider hit in second phase kmer search" + value="1" min="1" max="10" /> + + <param argument="-minid2" type="integer" label="Number of ref. sequences" help="Number of reference sequences to include in alignment phase" + value="1" min="1" max="10" /> + + <param argument="-otulim" type="integer" label="OTU limit" help="Minimum number of shared kmers to consider hit in alignment phase" + value="50" min="1" max="60" /> + + <param argument="-outfmt" type="select" label="Output format" help="The `confidences` format outputs confidence values for each of the taxonomic levels. "> + <option value="simple">simple</option> + <option value="confidences">confidences</option> + </param> + </inputs> + + <outputs> + <data format="tabular" name="classifications" label="Classification results"/> + <data name="otu_tsv" format="tabular" label="tab-output including taxIDs"> + <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter> + </data> + <data name="otu_tsv_notaxid" format="tabular" label="tab-output without taxIDs" > + <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter> + </data> + <data name="krona_format" format="tabular" label="Krona input" > + <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes" and ref_db['mapseq2biom']['krona_input'])</filter> + </data> + </outputs> + + <tests> + <test expect_num_outputs="1"> + <param name="db_source" value="history" /> + <param name="sequences" value="sequences.fasta"/> + <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> + <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> + <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> + <param name="mapseq2biom" value="no"/> + <output name="classifications" file="sequences.mapseq" sort="true"/> + <assert_command> + <has_text text="-seed" negate="true" /> + </assert_command> + </test> + <test expect_num_outputs="1"> + <param name="db_source" value="history" /> + <param name="sequences" value="sequences.fasta"/> + <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> + <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> + <param name="mapseq2biom" value="no"/> + <output name="classifications" file="sequences.mapseq" sort="true"/> + <assert_command> + <has_text text="-seed" negate="true" /> + </assert_command> + </test> + <test expect_num_outputs="3"> + <param name="db_source" value="history" /> + <param name="sequences" value="sequences.fasta"/> + <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> + <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> + <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> + <param name="mapseq2biom" value="yes"/> + <param name="krona_input" value="no"/> + <param name="otu_table" value="mapseq_db/test.otu" /> + <param name="seed" value="12" /> + <output name="classifications" file="sequences.mapseq" sort="true"/> + <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> + <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> + <assert_command> + <has_text text="-seed '12'" n="1" /> + </assert_command> + </test> + <test expect_num_outputs="4"> + <param name="db_source" value="history" /> + <param name="sequences" value="sequences.fasta"/> + <param name="database" value="mapseq_db/LSU_trimmed.fasta"/> + <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/> + <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/> + <param name="mapseq2biom" value="yes"/> + <param name="krona_input" value="yes"/> + <param name="otu_table" value="mapseq_db/test.otu" /> + <output name="classifications" file="sequences.mapseq" sort="true"/> + <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> + <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> + <output name="krona_format" file="mapseq2biom/krona_input.tabular" /> + <assert_command> + <has_text text="-seed" negate="true" /> + </assert_command> + </test> + <test expect_num_outputs="1"> + <param name="db_source" value="cached" /> + <param name="db_cached" value="test_mapseq_db" /> + <param name="sequences" value="sequences.fasta"/> + <param name="mapseq2biom" value="no"/> + <output name="classifications" file="sequences.mapseq" sort="true"/> + <assert_command> + <has_text text="-seed" negate="true" /> + </assert_command> + </test> + <test expect_num_outputs="3"> + <param name="db_source" value="cached" /> + <param name="db_cached" value="test_mapseq_db" /> + <param name="sequences" value="sequences.fasta"/> + <param name="mapseq2biom" value="yes"/> + <param name="krona_input" value="no"/> + <output name="classifications" file="sequences.mapseq" sort="true"/> + <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> + <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> + <assert_command> + <has_text text="-seed" negate="true" /> + </assert_command> + </test> + <test expect_num_outputs="4"> + <param name="db_source" value="cached" /> + <param name="db_cached" value="test_mapseq_db" /> + <param name="sequences" value="sequences.fasta"/> + <param name="mapseq2biom" value="yes"/> + <param name="krona_input" value="yes"/> + <param name="seed" value="12" /> + <output name="classifications" file="sequences.mapseq" sort="true"/> + <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" /> + <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" /> + <output name="krona_format" file="mapseq2biom/krona_input.tabular" /> + <assert_command> + <has_text text="-seed '12'" n="1" /> + </assert_command> + </test> + </tests> + + <help><![CDATA[ +MAPseq +====== +MAPseq is a set of fast and accurate sequence read classification tools +designed to assign taxonomy and OTU classifications to ribosomal RNA sequences. +This is done by using a reference set of full-length ribosomal RNA sequences +for which known taxonomies are known, and for which a set of high quality +OTU clusters has been previously generated. For each read, the best guess +and correspoding confidence in the assignment is shown at each taxonomic and OTU level. + +Mapseq2biom +=========== +This downstream script summaries the mapseq output as an OTU table +(including taxon information) as reads per OTU. This requires as input +an OTU to taxon mapping, for the taxonomy used to run the mapseq tool. + + +Example +------- + +Mapseq output: + +:: + + # mapseq v1.2.3 (Oct 2 2018) + #query dbhit bitscore identity matches mismatches gaps query_start query_end dbhit_start dbhit_end strand ITS2 + test.1 355527192 204 0.9863636493682861 217 1 2 0 218 0 220 - sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata + test.2 555948006 248 0.8478803038597107 340 42 19 200 582 192 593 - sk__Eukaryota;k__Fungi + test.4 406352048 217 0.9127272963523865 251 22 2 106 381 169 442 - sk__Eukaryota;k__Fungi;p__ + +OTU to taxon mapping: + +:: + + 1 sk__Eukaryota;k__Fungi + 2 sk__Eukaryota;k__Fungi;p__;c__;o__;f__;g__;s__uncultured_fungus + 3 sk__Eukaryota;k__Fungi;p__Ascomycota + 4 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales + 5 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Astrosphaeriellaceae;g__Pithomyces + 6 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Coniothyriaceae;g__Coniothyrium + 7 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae + 8 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma + 9 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata + 10 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymosphaeriaceae;g__Paraconiothyrium;s__Paraconiothyrium_cyclothyrioides + +OTU output: + +:: + + # Constructed from biom file + # OTU ID label taxonomy + 1 2.0 sk__Eukaryota;k__Fungi + 9 1.0 sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata + +Taxon output for Krona: + +:: + + 2 sk__Eukaryota k__Fungi + 1 sk__Eukaryota k__Fungi p__Ascomycota c__Dothideomycetes o__Pleosporales f__Didymellaceae g__Ectophoma s__Ectophoma_multirostrata + +Source +------ +* `GitHub <https://github.com/EBI-Metagenomics/pipeline-v5/blob/master/tools/RNA_prediction/mapseq2biom/mapseq2biom.pl>`_ + +License +------- +* `Apache-2.0 license <https://raw.githubusercontent.com/EBI-Metagenomics/pipeline-v5/master/LICENSE>`_ + ]]></help> + <creator> + <person givenName="Rand" familyName="Zoabi" url="https://github.com/RZ9082"/> + <person givenName="Paul" familyName="Zierep" url="https://github.com/paulzierep"/> + </creator> + <citations> + <citation type="doi"> + 10.1093/bioinformatics/btx517 + </citation> + <citation type="doi"> + 10.1093/nar/gkac1080 + </citation> + </citations> +</tool> \ No newline at end of file
