Mercurial > repos > iuc > mapseq

diff mapseq.xml @ 0:16f561c480bb draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mapseq commit 3652500c9a0b6d92f6dc254cea7dcfcc6522d842
author: iuc
date: Mon, 14 Oct 2024 12:27:57 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mapseq.xml	Mon Oct 14 12:27:57 2024 +0000
@@ -0,0 +1,331 @@
+<tool id="mapseq" name="MAPseq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
+    <description>sequence read classification designed to assign taxonomy and OTU classifications</description>
+    <macros>
+        <token name="@TOOL_VERSION@">2.1.1b</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">mapseq</xref>
+    </xrefs>
+    <requirements>
+        <requirement type="package" version="5.26">perl</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">mapseq</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+        #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "no":
+            ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta &&
+            ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt &&
+            ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster &&
+        #end if
+
+        #if $ref_db.db_source == "cached" and $ref_db.mapseq2biom.mapseq2biom == "yes":
+            ln -s '${ref_db.db_cached.fields.path}'/*.fasta db.fasta &&
+            ln -s '${ref_db.db_cached.fields.path}'/*.txt taxonomy.txt &&
+            ln -s '${ref_db.db_cached.fields.path}'/*.mscluster db.fasta.mscluster &&
+            ln -s '${ref_db.db_cached.fields.path}'/*.otu db.otu &&
+        #end if
+
+        #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "no":
+            ln -s '${ref_db.database}' db.fasta &&
+            ln -s '${ref_db.taxonomy}' taxonomy.txt &&
+            ln -s '${ref_db.mscluster}' db.fasta.mscluster &&
+        #end if
+
+        #if $ref_db.db_source == "history" and $ref_db.mapseq2biom.mapseq2biom == "yes":
+            ln -s '${ref_db.database}' db.fasta &&
+            ln -s '${ref_db.taxonomy}' taxonomy.txt &&
+            ln -s '${ref_db.mscluster}' db.fasta.mscluster &&
+            ln -s '${ref_db.mapseq2biom.otu_table}' db.otu &&
+        #end if
+        mapseq
+        -nthreads \${GALAXY_SLOTS:-8}
+        #if str($seed) != ""
+            -seed '$seed'
+        #end if
+        -tophits '$tophits'
+        -topotus '$topotus'
+        -minscore '$minscore'
+        -minid1 '$minid1'
+        -minid2 '$minid2'
+        -otulim '$otulim'
+        -outfmt '$outfmt'
+        '$sequences' db.fasta taxonomy.txt > '$classifications'
+
+        #if $ref_db.mapseq2biom.mapseq2biom == "yes":
+            &&
+            perl '$__tool_directory__/mapseq2biom.pl' --otuTable db.otu --query '$classifications' --outfile '$otu_tsv' --taxid --notaxidfile '$otu_tsv_notaxid' 
+            #if $ref_db.mapseq2biom.krona_input == 'yes':
+                --krona '$krona_format'
+            #end if
+        #end if
+
+    ]]></command>
+
+    <inputs>
+        <param type="data" name="sequences" format="fasta" label="Input sequences" />
+        <conditional name="ref_db">
+            <param name="db_source" type="select" label="Use cached database or database from history" help="">
+                <option value="cached">Cached database</option>
+                <option value="history">From history</option>
+            </param>
+            <when value="cached">
+                <param name="db_cached" type="select" label="Using built-in mapseq DB" help="">
+                    <options from_data_table="mapseq_db">
+                        <column name="value" index="0" />
+                        <column name="name" index="1" />
+                        <column name="version" index="2" />
+                        <column name="path" index="3" />
+                        <filter type="sort_by" column="1"/>
+                    </options>
+                    <validator type="no_options" message="A built-in mapseq DB is not available. Please ask the Galaxy admins to install one on the server." />
+                </param>
+                <conditional name="mapseq2biom">
+                    <param type="select" name="mapseq2biom" label="Create OTU table" help="Creates a tab-separated OTU table (including taxonomy classification) that can be used to create BIOM files">
+                        <option value="yes">Yes</option>
+                        <option value="no">No</option>
+                    </param>
+                    <when value="yes">
+                        <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" label="Create taxon table for Krona" help="Generates a reads per taxon file suitable for the use with Krona" />
+                    </when>
+                    <when value="no" />
+                </conditional>
+            </when>
+            <when value="history">
+                <param type="data" name="database" label="Database file (FASTA format)" format="fasta" />
+                <param type="data" name="taxonomy" label="Taxonomy file" format="tabular" />
+                <param type="data" name="mscluster" label="Database cluster" format="txt" optional="true" />
+                <conditional name="mapseq2biom">
+                    <param type="select" name="mapseq2biom" label="Create out of the MAPseq output a tab-separated output file?">
+                        <option value="yes">Yes</option>
+                        <option value="no">No</option>
+                    </param>
+                    <when value="yes">
+                        <param type="data" name="otu_table" format="txt" label="OTU table" help="The OTU table produced for the taxonomies found in the reference databases that was used with MAPseq" />
+                        <param type="boolean" name="krona_input" truevalue="yes" falsevalue="no" help="Generates an output file suitable for the use with Krona" />
+                    </when>
+                    <when value="no" />
+                </conditional>
+            </when>
+        </conditional>
+               
+        <param argument="-seed" type="integer" label="Fix random seed" help="Sets a fixed integer seed value for random number generation, ensuring reproducible results" optional="true"/>
+
+        <param argument="-tophits" type="integer" label="Top hits" help="Number of reference sequences to include in alignment phase"
+                value="20" min="1" max="200" />
+               
+        <param argument="-topotus" type="integer" label="Top OTUs" help="Number of internal reference otus to include in alignment phase"
+                value="10" min="1" max="200" />
+               
+        <param argument="-minscore" type="integer" label="Minimum score" 
+                help="Minimum score cutoff to consider for a classification, should be reduced when searching very small sequences, i.e.: primer search"
+                value="30" min="1" max="50" />
+               
+        <param argument="-minid1" type="integer" label="Minimum number of shared kmers" help="Minimum number of shared kmers to consider hit in second phase kmer search" 
+                value="1" min="1" max="10" />
+               
+        <param argument="-minid2" type="integer" label="Number of ref. sequences" help="Number of reference sequences to include in alignment phase"
+                value="1" min="1" max="10" />
+               
+        <param argument="-otulim" type="integer" label="OTU limit" help="Minimum number of shared kmers to consider hit in alignment phase" 
+                value="50" min="1" max="60" />
+
+        <param argument="-outfmt" type="select" label="Output format" help="The `confidences` format outputs confidence values for each of the taxonomic levels. ">
+            <option value="simple">simple</option>
+            <option value="confidences">confidences</option>
+        </param>
+    </inputs>
+
+    <outputs>
+        <data format="tabular" name="classifications" label="Classification results"/>
+        <data name="otu_tsv" format="tabular" label="tab-output including taxIDs">
+            <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter>
+        </data>
+        <data name="otu_tsv_notaxid" format="tabular" label="tab-output without taxIDs" >
+            <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes")</filter>
+        </data>
+        <data name="krona_format" format="tabular" label="Krona input" >
+            <filter>(ref_db['mapseq2biom']['mapseq2biom'] == "yes" and ref_db['mapseq2biom']['krona_input'])</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="db_source" value="history" />
+            <param name="sequences" value="sequences.fasta"/>
+            <param name="database" value="mapseq_db/LSU_trimmed.fasta"/>
+            <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/>
+            <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/>
+            <param name="mapseq2biom" value="no"/>
+            <output name="classifications" file="sequences.mapseq" sort="true"/>
+            <assert_command>
+                <has_text text="-seed" negate="true" />
+            </assert_command>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="db_source" value="history" />
+            <param name="sequences" value="sequences.fasta"/>
+            <param name="database" value="mapseq_db/LSU_trimmed.fasta"/>
+            <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/>
+            <param name="mapseq2biom" value="no"/>
+            <output name="classifications" file="sequences.mapseq" sort="true"/>
+            <assert_command>
+                <has_text text="-seed" negate="true" />
+            </assert_command>
+        </test>
+        <test expect_num_outputs="3">
+            <param name="db_source" value="history" />
+            <param name="sequences" value="sequences.fasta"/>
+            <param name="database" value="mapseq_db/LSU_trimmed.fasta"/>
+            <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/>
+            <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/>
+            <param name="mapseq2biom" value="yes"/>
+            <param name="krona_input" value="no"/>
+            <param name="otu_table" value="mapseq_db/test.otu" />
+            <param name="seed" value="12" />
+            <output name="classifications" file="sequences.mapseq" sort="true"/>
+            <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" />
+            <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" />
+            <assert_command>
+                <has_text text="-seed '12'" n="1" />
+            </assert_command>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="db_source" value="history" />
+            <param name="sequences" value="sequences.fasta"/>
+            <param name="database" value="mapseq_db/LSU_trimmed.fasta"/>
+            <param name="taxonomy" value="mapseq_db/slv_lsu_filtered2_trimmed.txt"/>
+            <param name="mscluster" value="mapseq_db/LSU_trimmed.fasta.mscluster"/>
+            <param name="mapseq2biom" value="yes"/>
+            <param name="krona_input" value="yes"/>
+            <param name="otu_table" value="mapseq_db/test.otu" />
+            <output name="classifications" file="sequences.mapseq" sort="true"/>
+            <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" />
+            <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" />
+            <output name="krona_format" file="mapseq2biom/krona_input.tabular" />
+            <assert_command>
+                <has_text text="-seed" negate="true" />
+            </assert_command>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="db_source" value="cached" />
+            <param name="db_cached" value="test_mapseq_db" />
+            <param name="sequences" value="sequences.fasta"/>
+            <param name="mapseq2biom" value="no"/>
+            <output name="classifications" file="sequences.mapseq" sort="true"/>
+            <assert_command>
+                <has_text text="-seed" negate="true" />
+            </assert_command>
+        </test>
+        <test expect_num_outputs="3">
+            <param name="db_source" value="cached" />
+            <param name="db_cached" value="test_mapseq_db" />
+            <param name="sequences" value="sequences.fasta"/>
+            <param name="mapseq2biom" value="yes"/>
+            <param name="krona_input" value="no"/>
+            <output name="classifications" file="sequences.mapseq" sort="true"/>
+            <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" />
+            <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" />
+            <assert_command>
+                <has_text text="-seed" negate="true" />
+            </assert_command>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="db_source" value="cached" />
+            <param name="db_cached" value="test_mapseq_db" />
+            <param name="sequences" value="sequences.fasta"/>
+            <param name="mapseq2biom" value="yes"/>
+            <param name="krona_input" value="yes"/>
+            <param name="seed" value="12" />
+            <output name="classifications" file="sequences.mapseq" sort="true"/>
+            <output name="otu_tsv" file="mapseq2biom/tab-output_including_taxIDs.tabular" />
+            <output name="otu_tsv_notaxid" file="mapseq2biom/tab-output_without_taxIDs.tabular" />
+            <output name="krona_format" file="mapseq2biom/krona_input.tabular" />
+            <assert_command>
+                <has_text text="-seed '12'" n="1" />
+            </assert_command>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+MAPseq
+======
+MAPseq is a set of fast and accurate sequence read classification tools 
+designed to assign taxonomy and OTU classifications to ribosomal RNA sequences. 
+This is done by using a reference set of full-length ribosomal RNA sequences
+for which known taxonomies are known, and for which a set of high quality
+OTU clusters has been previously generated. For each read, the best guess
+and correspoding confidence in the assignment is shown at each taxonomic and OTU level.
+
+Mapseq2biom
+===========
+This downstream script summaries the mapseq output as an OTU table 
+(including taxon information) as reads per OTU. This requires as input 
+an OTU to taxon mapping, for the taxonomy used to run the mapseq tool.
+
+
+Example
+-------
+
+Mapseq output:
+
+::
+
+	# mapseq v1.2.3 (Oct  2 2018)
+	#query	dbhit	bitscore	identity	matches	mismatches	gaps	query_start	query_end	dbhit_start	dbhit_end	strand		ITS2
+	test.1	355527192	204	0.9863636493682861	217	1	2	0	218	0	220	-		sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata
+	test.2	555948006	248	0.8478803038597107	340	42	19	200	582	192	593	-		sk__Eukaryota;k__Fungi
+	test.4	406352048	217	0.9127272963523865	251	22	2	106	381	169	442	-		sk__Eukaryota;k__Fungi;p__
+
+OTU to taxon mapping:
+
+::
+
+	1	sk__Eukaryota;k__Fungi
+	2	sk__Eukaryota;k__Fungi;p__;c__;o__;f__;g__;s__uncultured_fungus
+	3	sk__Eukaryota;k__Fungi;p__Ascomycota
+	4	sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales
+	5	sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Astrosphaeriellaceae;g__Pithomyces
+	6	sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Coniothyriaceae;g__Coniothyrium
+	7	sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae
+	8	sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma
+	9	sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata
+	10	sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymosphaeriaceae;g__Paraconiothyrium;s__Paraconiothyrium_cyclothyrioides
+
+OTU output:
+
+::
+
+	# Constructed from biom file
+	# OTU ID	label	taxonomy
+	1	2.0	sk__Eukaryota;k__Fungi
+	9	1.0	sk__Eukaryota;k__Fungi;p__Ascomycota;c__Dothideomycetes;o__Pleosporales;f__Didymellaceae;g__Ectophoma;s__Ectophoma_multirostrata
+
+Taxon output for Krona:
+
+::
+
+	2	sk__Eukaryota	k__Fungi
+	1	sk__Eukaryota	k__Fungi	p__Ascomycota	c__Dothideomycetes	o__Pleosporales	f__Didymellaceae	g__Ectophoma	s__Ectophoma_multirostrata
+
+Source
+------
+* `GitHub <https://github.com/EBI-Metagenomics/pipeline-v5/blob/master/tools/RNA_prediction/mapseq2biom/mapseq2biom.pl>`_
+
+License
+-------
+* `Apache-2.0 license <https://raw.githubusercontent.com/EBI-Metagenomics/pipeline-v5/master/LICENSE>`_
+    ]]></help>
+    <creator>
+        <person givenName="Rand" familyName="Zoabi" url="https://github.com/RZ9082"/>
+        <person givenName="Paul" familyName="Zierep" url="https://github.com/paulzierep"/>
+    </creator>
+    <citations>
+        <citation type="doi">
+            10.1093/bioinformatics/btx517
+        </citation>
+        <citation type="doi">
+            10.1093/nar/gkac1080
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file