Mercurial > repos > iuc > mmseqs2_taxonomy_assignment

diff mmseqs2_taxonomy_assignment.xml @ 0:dbd5e2791f49 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 25b603890e8048ac7bbffb81eaaa440f87049521
author: iuc
date: Thu, 27 Mar 2025 16:42:24 +0000
children: 96b969571420
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mmseqs2_taxonomy_assignment.xml	Thu Mar 27 16:42:24 2025 +0000
@@ -0,0 +1,618 @@
+<tool id="mmseqs2_taxonomy_assignment" name="MMseqs2 Taxonomy Assignments" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>
+        of sequences by comparing them to a reference database
+    </description>
+    <macros>
+        <import>macro.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+ln -s -f '${createdb.input_fasta}' 'input' &&
+mmseqs createdb
+    'input'
+    'sequenceDB'
+    --dbtype '$createdb.alph_type.dbtype'
+    --shuffle $createdb.shuffle &&
+
+cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . &&
+
+mmseqs createtaxdb
+    database
+    'tmp'
+    #if $createtaxdb.tax_mapping_file
+    --tax-mapping-file '$createtaxdb.tax_mapping_file'
+    #end if
+    --tax-mapping-mode '$createtaxdb.tax_mapping_mode'
+    --threads "\${GALAXY_SLOTS:-1}" &&
+
+#if $filtertaxseqdb.taxon_list
+    mmseqs filtertaxseqdb
+        'database'
+        'database_filtered'
+        --taxon-list '$filtertaxseqdb.taxon_list'
+            &&
+#end if
+
+mmseqs taxonomy
+    'sequenceDB'
+    #if $filtertaxseqdb.taxon_list
+        'database_filtered'
+    #else 
+        'database'
+    #end if
+    'output_taxonomy'
+    'tmp'
+    #if str($createdb.alph_type.dbtype) == "1"
+        --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale
+    #elif str($createdb.alph_type.dbtype) == "2"
+        --zdrop $createdb.alph_type.zdrop
+    #end if
+    ##Pre-filter options
+    --add-self-matches $taxonomy.prefilter.add_self_matches
+    -s $taxonomy.prefilter.sensitivity
+    -k $taxonomy.prefilter.kmer_length
+    --target-search-mode $taxonomy.prefilter.target_search_mode
+    ##--k-score TWIN                   k-mer threshold for generating similar k-mer lists [seq:2147483647,prof:2147483647]
+    --max-seqs $taxonomy.prefilter.max_seqs
+    --split $taxonomy.prefilter.split
+    --split-mode $taxonomy.prefilter.split_mode
+    ##--split-memory-limit BYTE        Set max memory per split. E.g. 800B, 5K, 10M, 1G. Default (0) to all available system memory [0]
+    --diag-score $taxonomy.prefilter.diag_score
+    --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching
+    --mask $taxonomy.prefilter.mask
+    --mask-prob $taxonomy.prefilter.mask_prob
+    --mask-lower-case $taxonomy.prefilter.mask_lower_case
+    --min-ungapped-score $taxonomy.prefilter.min_ungapped_score
+    --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode
+    ##--spaced-kmer-pattern STR        User-specified spaced k-mer pattern []
+    ##--local-tmp STR                  Path where some of the temporary files will be created []
+    ##--disk-space-limit BYTE          Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0]
+    
+    ##Align options
+    -a $taxonomy.align.convertalis
+    ##The next 2 parameters seems to be the same
+    --alignment-mode $taxonomy.align.alignment_mode 
+    --alignment-output-mode $taxonomy.align.alignment_output_mode 
+    --wrapped-scoring $taxonomy.align.wrapped_scoring
+    -e $taxonomy.align.evalue
+    --min-seq-id $taxonomy.align.min_seq_id
+    --min-aln-len $taxonomy.align.min_aln_len
+    --seq-id-mode $taxonomy.align.seq_id_mode
+    --alt-ali $taxonomy.align.alt_ali
+    -c $taxonomy.align.cov
+    --cov-mode $taxonomy.align.cov_mode
+    --max-rejected $taxonomy.align.max_rejected
+    --max-accept $taxonomy.align.max_accept
+    --score-bias $taxonomy.align.score_bias
+    --realign $taxonomy.align.realign
+    --realign-score-bias $taxonomy.align.realign_score_bias
+    --realign-max-seqs $taxonomy.align.realign_max_seqs
+    --corr-score-weight $taxonomy.align.corr_score_weight
+    --exhaustive-search-filter $taxonomy.align.exhaustive_search_filter
+
+    ##Profile options
+    ##--pca                            Pseudo count admixture strength []
+    ##--pcb                            Pseudo counts: Neff at half of maximum admixture (range 0.0-inf) []
+    --mask-profile $taxonomy.profile.mask_profile
+    --e-profile $taxonomy.profile.e_profile
+    --wg $taxonomy.profile.wg
+    --filter-msa $taxonomy.profile.filter_msa
+    --filter-min-enable $taxonomy.profile.filter_min_enable
+    --max-seq-id $taxonomy.profile.max_seq_id
+    --qid $taxonomy.profile.qid
+    --qsc $taxonomy.profile.qsc
+    --cov $taxonomy.profile.cov
+    --diff $taxonomy.profile.diff
+    --pseudo-cnt-mode $taxonomy.profile.pseudo_cnt_mode
+    --exhaustive-search $taxonomy.profile.exhaustive_search
+    --lca-search $taxonomy.profile.lca_search
+
+    ##Misc options
+    ##--orf-filter INT                 Prefilter query ORFs with non-selective search
+    ##                             Only used during nucleotide-vs-protein classification
+    ##                             NOTE: Consider disabling when classifying short reads [1]
+    --orf-filter-e $taxonomy.misc.orf_filter_e
+    --orf-filter-s $taxonomy.misc.orf_filter_s
+    --lca-mode $taxonomy.misc.lca_mode
+    --tax-output-mode $taxonomy.misc.tax_output_mode
+    --majority $taxonomy.misc.majority
+    --vote-mode $taxonomy.misc.vote_mode
+    ##--lca-ranks STR                  Add column with specified ranks (',' separated) []
+    --tax-lineage $taxonomy.misc.tax_lineage
+    --blacklist $taxonomy.misc.blacklist
+    --taxon-list $taxonomy.misc.taxon_list
+    --rescore-mode $taxonomy.misc.rescore_mode
+    --allow-deletion $taxonomy.misc.allow_deletion
+    --min-length $taxonomy.misc.min_length
+    --max-length $taxonomy.misc.max_length
+    --max-gaps $taxonomy.misc.max_gaps
+    --contig-start-mode $taxonomy.misc.contig_start_mode
+    --contig-end-mode $taxonomy.misc.contig_end_mode
+    --orf-start-mode $taxonomy.misc.orf_start_mode
+    --forward-frames $taxonomy.misc.forward_frames
+    --reverse-frames $taxonomy.misc.reverse_frames
+    --translation-table $taxonomy.misc.translation_table
+    --translate $taxonomy.misc.translate
+    --use-all-table-starts $taxonomy.misc.use_all_table_starts
+    --id-offset $taxonomy.misc.id_offset
+    --add-orf-stop $taxonomy.misc.add_orf_stop
+    --sequence-overlap $taxonomy.misc.sequence_overlap
+    --sequence-split-mode $taxonomy.misc.sequence_split_mode
+    --headers-split-mode $taxonomy.misc.headers_split_mode
+    --search-type $createtaxdb.database_type.search_type
+    --prefilter-mode $taxonomy.misc.prefilter_mode
+
+    ##Common options
+    ##--compressed INT                 Write compressed output [0]
+    --threads "\${GALAXY_SLOTS:-1}"
+    ##-v INT                           Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
+    --max-seq-len $taxonomy.common.max_seq_len
+    ##--db-load-mode INT               Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch [0]
+    ##--mpi-runner STR                 Use MPI on compute cluster with this MPI command (e.g. "mpirun -np 42") []
+    ##--force-reuse BOOL               Reuse tmp filse in tmp/latest folder ignoring parameters and version changes [0]
+    ##--remove-tmp-files BOOL          Delete temporary files [0]
+
+    ##Expert options
+    --filter-hits $taxonomy.expert.filter_hits
+    --sort-results $taxonomy.expert.sort_results
+    ##--create-lookup INT              Create database lookup file (can be very large) [0]
+    --chain-alignments $taxonomy.expert.chain_alignments
+    --merge-query $taxonomy.expert.merge_query
+    ##--strand INT                     Strand selection only works for DNA/DNA search 0: reverse, 1: forward, 2: both [1]
+&&
+mmseqs createtsv
+    'sequenceDB'
+    'output_taxonomy'
+    'taxo_result.tsv'
+
+    --first-seq-as-repr $createtsv.first_seq_as_repr
+    --target-column $createtsv.target_column
+    --full-header $createtsv.full_header
+    --idx-seq-src $createtsv.idx_seq_src
+    --threads "\${GALAXY_SLOTS:-1}"
+    ##--compressed INT          Write compressed output [0]
+    ##-v INT                    Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
+    ##--db-output BOOL          Return a result DB instead of a text file [0]
+
+#if str($kraken_report.keep_report) == "Yes"
+&&
+    mmseqs taxonomyreport
+    #if $filtertaxseqdb.taxon_list
+        'database_filtered'
+    #else 
+        'database'
+    #end if
+        'output_taxonomy'
+        'taxo_result.txt'
+        --report-mode 0
+        --threads "\${GALAXY_SLOTS:-1}"
+#end if
+#if str($krona_report.keep_report) == "Yes"
+&&
+    mmseqs taxonomyreport
+    #if $filtertaxseqdb.taxon_list
+        'database_filtered'
+    #else 
+        'database'
+    #end if
+        'output_taxonomy'
+        'taxo_result.html'
+        --report-mode 1
+        --threads "\${GALAXY_SLOTS:-1}"
+#end if
+    ##-v INT              Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3]
+
+    ]]></command>
+    <inputs>
+        <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format"  expanded="true">
+            <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" />
+            <conditional name="alph_type">
+                <param argument="--dbtype" type="select" label="Input type" help="" >
+                    <option value="0" selected="true">Auto</option>
+                    <option value="1">Amino acid</option>
+                    <option value="2">Nucleotides</option>
+                </param>
+                <when value="0"/>
+                <when value="1">
+                    <param argument="--comp-bias-corr-scale" type="float" min="0" max="1" value="1" label="Scale composition bias correction" help=""/>
+                </when>
+                <when value="2">
+                    <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/>
+                </when>
+            </conditional>
+            <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" />
+        </section>
+        <section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true">
+            <conditional name="database_type">
+                <param name="type" type="select" label="Database type" help="" >
+                    <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option>
+                    <option value="nucleotides_tax">Nucleotides with taxonomy information</option>
+                    <option value="amino_acid">Amino acid without taxonomy information</option>
+                    <option value="nucleotides">Nucleotides without taxonomy information</option>
+                </param>
+                <when value="amino_acid_tax">
+                    <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
+                        <options from_data_table="mmseqs2_databases">
+                            <filter type="static_value" value="aminoacid" column="type"/>
+                            <filter type="static_value" value="yes" column="taxonomy"/>
+                            <validator message="No mmseqs2 database is available" type="no_options"/>
+                        </options>
+                    </param>
+                    <expand macro="search_type_aa" />
+                </when>
+                <when value="nucleotides_tax">
+                    <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
+                        <options from_data_table="mmseqs2_databases">
+                            <filter type="static_value" value="nucleotide" column="type"/>
+                            <filter type="static_value" value="yes" column="taxonomy"/>
+                            <validator message="No mmseqs2 database is available" type="no_options"/>
+                        </options>
+                    </param>
+                    <expand macro="search_type_nt" />
+                </when>
+                <when value="amino_acid">
+                    <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
+                        <options from_data_table="mmseqs2_databases">
+                            <filter type="static_value" value="aminoacid" column="type"/>
+                            <filter type="static_value" value="no" column="taxonomy"/>
+                            <validator message="No mmseqs2 database is available" type="no_options"/>
+                        </options>
+                    </param>
+                    <expand macro="search_type_aa" />
+                </when>
+                <when value="nucleotides">
+                    <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases">
+                        <options from_data_table="mmseqs2_databases">
+                            <filter type="static_value" value="nucleotide" column="type"/>
+                            <filter type="static_value" value="no" column="taxonomy"/>
+                            <validator message="No mmseqs2 database is available" type="no_options"/>
+                        </options>
+                    </param>
+                    <expand macro="search_type_nt" />
+                </when>
+            </conditional>
+            <param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/>
+            <param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" >
+                <option value="0" selected="true">0: .lookup file</option>
+                <option value="1">1: .source file</option>
+            </param>
+        </section>
+        <section name="filtertaxseqdb" title="Filter taxonomy sequence database">
+            <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/>
+        </section>
+        <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs">
+            <section name="prefilter" title="Pre-filter">
+                <expand macro="prefilter_common_parameters" />
+                <param argument="--spaced-kmer-mode" type="select" label="Spaced k-mer mode" help="">
+                    <option value="0">Use consecutive positions in k-mers</option>
+                    <option value="1" selected="true">Use spaced k-mers</option>
+                </param>
+                <param argument="--min-ungapped-score" type="integer" min="0" value="15" label="Accept only matches with ungapped alignment score above threshold" help=""/>
+                <param argument="-s" name="sensitivity" type="float" min="0" max="7.5" value="2" label="Sensitivity" help="1.0 faster; 4.0 fast; 7.5 sensitive"/>
+                <param argument="--target-search-mode" type="select" label="Target search mode" help="" >
+                    <option value="0" selected="true">Regular k-mer</option>
+                    <option value="1">Similar k-mer</option>
+                </param>
+                <param argument="--max-seqs" type="integer" min="0" value="300" label="Maximum results per query sequence allowed to pass the prefilter" help="Affects sensitivity"/>
+                <param argument="--split" type="integer" min="0" value="0" label="Split input into N equally distributed chunks" help="0: set the best split automatically"/>            
+                <param argument="--split-mode" type="select" label="Split mode" help="" >
+                    <option value="0">Split target db</option>
+                    <option value="1">Split query db</option>
+                    <option value="2" selected="true">Auto, depending on main memory</option>
+                </param>
+                <param argument="--diag-score" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Use ungapped diagonal scoring during prefilter" help=""/>
+                <param argument="--exact-kmer-matching" type="integer" min="0" max="1" value="0" label="Extract only exact k-mers for matching" help=""/>
+            </section>
+            <section name="align" title="Align">
+                <expand macro="align_common_parameters" />
+                <param argument="--alignment-mode" type="select" label="Alignment mode : How to compute the alignment" help="" >
+                    <option value="0">Automatic</option>
+                    <option value="1" selected="true">Only score and end_pos</option>
+                    <option value="2">Also start_pos and cov</option>
+                    <option value="3">Also seq.id</option>
+                    <option value="4">Only ungapped alignment</option>
+                </param>
+                <param argument="-e" name="evalue" type="float" min="0" value="1" label="E-value threshold" help="List matches below this E-value"/>
+                <param argument="--min-seq-id" type="float" min="0" max="1" value="0" label="Minimum sequence identity" help="List matches above this sequence identity for clustering"/>
+                <param argument="-c" name="cov" type="float" min="0" value="0" label="List matches above this fraction of aligned (covered) residues" help=""/>
+                <param argument="--cov-mode" type="select" label="Coverage mode" help="" >
+                    <option value="0" selected="true">Coverage of query and target</option>
+                    <option value="1">Coverage of target</option>
+                    <option value="2">Coverage of query</option>
+                    <option value="3">Target seq. length has to be at least x% of query length</option>
+                    <option value="4">Query seq. length has to be at least x% of target length</option>
+                    <option value="5">Short seq. needs to be at least x% of the other seq. length</option>
+                </param>
+                <param argument="--max-rejected" type="integer" min="0" value="5" label="Maximum rejected alignments before alignment calculation for a query is stopped" help=""/>
+                <param argument="--max-accept" type="integer" min="0" value="30" label="Maximum accepted alignments before alignment calculation for a query is stopped" help=""/>
+                <param argument="--exhaustive-search-filter" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Filter result during search ?" help=""/>
+            </section>
+            <section name="profile" title="Profile">
+                <param argument="--mask-profile" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Mask query sequence of profile using tantan" help=""/>
+                <param argument="--e-profile" type="float" min="0" value="1e-03" label="Include sequences matches with inf E-value threshold into the profile" help=""/>
+                <param argument="--wg" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use global sequence weighting for profile calculation" help=""/>
+                <param argument="--filter-msa" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Filter MSA" help=""/>
+                <param argument="--filter-min-enable" type="integer" min="0" value="0" label="Only filter MSAs with more than N sequences, 0 always filters" help=""/>
+                <param argument="--max-seq-id" type="float" min="0" max="1" value="0.9" label="Reduce redundancy of output MSA using max. pairwise sequence identity" help=""/>
+                <param argument="--qid" type="text" value="0" label="Reduce diversity of output MSAs using min.seq. identity with query sequences [0.0,1.0]" help="Alternatively, can be a list of multiple thresholds:
+                                  E.g.: 0.15,0.30,0.50 to defines filter buckets of ]0.15-0.30] and ]0.30-0.50]"/>
+                <param argument="--qsc" type="float" min="-50" max="100" value="-20" label="Reduce diversity of output MSAs using min. score per aligned residue with query sequences" help=""/>
+                <param argument="--cov" type="float" min="0" max="1" value="0" label="Filter output MSAs using min. fraction of query residues covered by matched sequences" help=""/>
+                <param argument="--diff" type="integer" min="0" value="1000" label="Filter MSAs by selecting most diverse set of sequences, keeping at least this many seqs in each MSA block of length 50" help=""/>
+                <param argument="--pseudo-cnt-mode" type="select" label="Pseudo count mode" help="" >
+                    <option value="0" selected="true">Substitution-matrix</option>
+                    <option value="1">Context-specific pseudocounts</option>
+                </param>
+                <param argument="--exhaustive-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Exhaustive search" help=""/>
+                <param argument="--lca-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Efficient search for LCA candidates" help=""/>
+            </section>
+            <section name="misc" title="Misc">
+                <param argument="--orf-filter-e" type="float" min="0" value="1.000E+02" label="E-value threshold used for query ORF prefiltering" help=""/>
+                <param argument="--orf-filter-s" type="float" min="0" value="2" label="Sensitivity used for query ORF prefiltering" help=""/>
+                <param argument="--lca-mode" type="select" label="LCA mode" help="" >
+                    <option value="1">Single search LCA</option>
+                    <option value="3" selected="true">Approximate 2bLCA</option>
+                    <option value="4">Top hit</option>
+                </param>
+                <param argument="--tax-output-mode" type="select" label="Taxonomy output mode" help="" >
+                    <option value="0" selected="true">Output LCA</option>
+                    <option value="1">Output alignment</option>
+                    <option value="2">Output both</option>
+                </param>
+                <param argument="--majority" type="float" min="0" value="0.5" label="Minimal fraction of agreement among taxonomically assigned sequences of a set" help=""/>
+                <param argument="--vote-mode" type="select" label="Mode of assigning weights to compute majority" help="" >
+                    <option value="0">Uniform</option>
+                    <option value="1" selected="true">Minus log E-value</option>
+                    <option value="2">Score</option>
+                </param>
+                <param argument="--tax-lineage" type="select" label="Taxonomy lineage" help="" >
+                    <option value="0" selected="true">Don't show</option>
+                    <option value="1">Add all lineage names</option>
+                    <option value="2">Add all lineage taxids</option>
+                </param>
+                <param argument="--blacklist" type="text" value="" label="Comma separated list of ignored taxa in LCA computation" help=""/>
+                <param argument="--taxon-list" type="text" value="" label="Taxonomy ID, possibly multiple values separated by ','" help=""/>
+                <param argument="--rescore-mode" type="select" label="Rescore diagonals with" help="" >
+                    <option value="0" selected="true">Hamming distance</option>
+                    <option value="1">Local alignment (score only)</option>
+                    <option value="2">Local alignment</option>
+                    <option value="3">Global alignment</option>
+                    <option value="4">Longest alignment fulfilling window quality criterion</option>
+                </param>
+                <param argument="--allow-deletion" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Allow deletions in a MSA" help=""/>
+                <param argument="--min-length" type="integer" min="0" value="30" label="Minimum codon number in open reading frames" help=""/>
+                <param argument="--max-length" type="integer" min="0" value="32734" label="Maximum codon number in open reading frames" help=""/>
+                <param argument="--max-gaps" type="integer" min="0" value="2147483647" label="Maximum number of codons with gaps or unknown residues before an open reading frame is rejected" help=""/>
+                <param argument="--contig-start-mode" type="select" label="Contig start can be" help="" >
+                    <option value="0">Incomplete</option>
+                    <option value="1">Complete</option>
+                    <option value="2" selected="true">Both</option>
+                </param>
+                <param argument="--contig-end-mode" type="select" label="Contig end can be" help="" >
+                    <option value="0">Incomplete</option>
+                    <option value="1">Complete</option>
+                    <option value="2" selected="true">Both</option>
+                </param>
+                <param argument="--orf-start-mode" type="select" label="ORF fragment can be" help="" >
+                    <option value="0">From start to stop</option>
+                    <option value="1" selected="true">From any to stop</option>
+                    <option value="2">From last encountered start to stop (no start in the middle)</option>
+                </param>
+                <param argument="--forward-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the forward strand to be extracted" help=""/>
+                <param argument="--reverse-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the reverse strand to be extracted" help=""/>
+                <param argument="--translation-table" type="select" label="Translation table" help="">
+                    <option value="1" selected="true">Canonical</option>
+                    <option value="2">The Vertebrate Mitochondrial Code</option>
+                    <option value="3">The Yeast Mitochondrial Code</option>
+                    <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
+                    <option value="5">The Invertebrate Mitochondrial Code</option>
+                    <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
+                    <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option>
+                    <option value="10">The Euplotid Nuclear Code</option>
+                    <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option>
+                    <option value="12">The Alternative Yeast Nuclear Code</option>
+                    <option value="13">The Ascidian Mitochondrial Code</option>
+                    <option value="14">The Alternative Flatworm Mitochondrial Code</option>
+                    <option value="15">Blepharisma Nuclear Code</option>
+                    <option value="16">Chlorophycean Mitochondrial Code</option>
+                    <option value="21">Trematode Mitochondrial Code</option>
+                    <option value="22">Scenedesmus obliquus Mitochondrial Code</option>
+                    <option value="23">Thraustochytrium Mitochondrial Code</option>
+                    <option value="24">Rhabdopleuridae Mitochondrial Code</option>
+                    <option value="25">Candidate Division SR1 and Gracilibacteria Code</option>
+                    <option value="26">Pachysolen tannophilus Nuclear Code</option>
+                    <option value="27">Karyorelict Nuclear Code</option>
+                    <option value="28">Condylostoma Nuclear Code</option>
+                    <option value="29">Mesodinium Nuclear Code</option>
+                    <option value="30">Peritrich Nuclear Code</option>
+                    <option value="31">Blastocrithidia Nuclear Code</option>
+                </param>
+                <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/>
+                <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/>
+                <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/>
+                <param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/>
+                <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/>
+                <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" >
+                    <option value="0">Copy data</option>
+                    <option value="1" selected="true">Soft link data and write new index</option>
+                </param>
+                <param argument="--headers-split-mode" type="select" label="Headers split mode" help="" >
+                    <option value="0" selected="true">Split position</option>
+                    <option value="1">Original header</option>
+                </param>
+                <param argument="--prefilter-mode" type="select" label="Prefilter mode" help="" >
+                    <option value="0" selected="true">Kmer/ungapped</option>
+                    <option value="1">Ungapped</option>
+                    <option value="2">No filter</option>
+                </param>
+            </section>
+            <expand macro="common_section"/>
+            <section name="expert" title="Expert">
+                <expand macro="expert_common_parameters" />
+                <param argument="--chain-alignments" type="integer" min="0" value="0" label="Chain alignments" help=""/>
+                <param argument="--merge-query" type="integer" min="0" value="1" label="Combine ORFs/split sequences to a single entry" help=""/>
+            </section>
+        </section>
+        <section name="createtsv" title="Create a tsv report from taxonomy output ">
+            <param argument="--first-seq-as-repr" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use the first sequence of the clustering result as representative sequence" help=""/>
+            <param argument="--target-column" type="integer" min="0" value="1" label="Select a target column, 0 if no target id exists" help="" />
+            <param argument="--full-header" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Replace DB ID by its corresponding Full Header" help=""/>
+            <param argument="--idx-seq-src" type="select" label="Index sequences source" help="">
+                <option value="0" selected="true">Auto</option>
+                <option value="1">Split/translated sequences</option>
+                <option value="2">Input sequences</option>
+            </param>
+        </section>
+        <conditional name="kraken_report">
+            <param name="keep_report" type="select" label="Do you want a Kraken style report" help="" >
+                <option value="Yes" selected="true">Yes</option>
+                <option value="No">No</option>
+            </param>
+            <when value="Yes"/>
+            <when value="No"/>
+        </conditional>
+                <conditional name="krona_report">
+            <param name="keep_report" type="select" label="Do you want a Krona style report" help="" >
+                <option value="Yes" selected="true">Yes</option>
+                <option value="No">No</option>
+            </param>
+            <when value="Yes"/>
+            <when value="No"/>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output_taxonomy_tsv" format="tabular" from_work_dir="taxo_result.tsv" label="${tool.name} on ${on_string}: Taxonomy Report"/>
+        <data name="output_taxonomy_kraken" format="txt" from_work_dir="taxo_result.txt" label="${tool.name} on ${on_string}: Kraken Report">
+            <filter>kraken_report['keep_report'] == "Yes"</filter>
+        </data>
+        <data name="output_taxonomy_krona" format="html" from_work_dir="taxo_result.html" label="${tool.name} on ${on_string}: Krona Report">
+            <filter>krona_report['keep_report'] == "Yes"</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test with Kraken report -->
+        <test expect_num_outputs="2">
+            <section name="createdb">
+                <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
+                <conditional name="alph_type">
+                    <param name="dbtype" value="2"/>
+                </conditional>
+            </section>
+            <section name="createtaxdb">
+                <conditional name="database_type">
+                    <param name="type" value="amino_acid_tax"/>
+                    <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
+                </conditional>
+            </section>
+            <section name="filtertaxseqdb">
+                <param name="taxon_list" value="2" />
+            </section>
+            <conditional name="krona_report">
+                <param name="keep_report" value="No"/>
+            </conditional>
+            <output name="output_taxonomy_tsv" ftype="tabular">
+                <assert_contents>
+                    <has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
+                    <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
+                    <has_n_columns n="8"/>
+                </assert_contents>
+            </output>
+            <output name="output_taxonomy_kraken" ftype="txt">
+                <assert_contents>
+                    <has_text text="93.3333"/>
+                    <has_text text="33.3333"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <section name="createdb">
+                <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
+            </section>
+            <section name="createtaxdb">
+                <conditional name="database_type">
+                    <param name="type" value="amino_acid_tax"/>
+                    <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
+                </conditional>
+            </section>
+            <conditional name="kraken_report">
+                <param name="keep_report" value="No"/>
+            </conditional>
+            <output name="output_taxonomy_tsv" ftype="tabular">
+                <assert_contents>
+                    <has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
+                    <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
+                    <has_n_columns n="8"/>
+                </assert_contents>
+            </output>
+            <output name="output_taxonomy_krona" ftype="html">
+                <assert_contents>
+                    <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="3">
+            <section name="createdb">
+                <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/>
+            </section>
+            <section name="createtaxdb">
+                <conditional name="database_type">
+                    <param name="type" value="amino_acid_tax"/>
+                    <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" />
+                </conditional>
+            </section>
+            <output name="output_taxonomy_tsv" ftype="tabular">
+                <assert_contents>
+                    <has_line line="MYSTERY.222&#009;1236&#009;class&#009;Gammaproteobacteria&#009;1&#009;1&#009;1&#009;1.000"/>
+                    <has_line line="MYSTERY.64&#009;119060&#009;family&#009;Burkholderiaceae&#009;1&#009;1&#009;1&#009;1.000"/>
+                    <has_n_columns n="8"/>
+                </assert_contents>
+            </output>
+            <output name="output_taxonomy_krona" ftype="html">
+                <assert_contents>
+                    <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/>
+                </assert_contents>
+            </output>
+            <output name="output_taxonomy_kraken" ftype="txt">
+                <assert_contents>
+                    <has_text text="93.3333"/>
+                    <has_text text="33.3333"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**MMseqs2: ultra fast and sensitive sequence search and clustering suite**
+
+MMseqs2 (Many-against-Many sequence searching) is a software suite to search and cluster huge protein and nucleotide sequence sets. 
+MMseqs2 is open source GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta version, via cygwin) Windows. 
+The software is designed to run on multiple cores and servers and exhibits very good scalability. 
+MMseqs2 can run 10000 times faster than BLAST. At 100 times its speed it achieves almost the same sensitivity. 
+It can perform profile searches with the same sensitivity as PSI-BLAST at over 400 times its speed.
+
+**Usage** 
+
+* Convert FASTA/Q file(s) to MMseqs sequence DB format
+    *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]*
+
+* Add taxonomic labels to sequence DB
+    *mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]*
+
+* Filter taxonomy sequence database
+    *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]*
+
+* Taxonomy assignment by computing the lowest common ancestor of homologs
+    *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]*
+
+* Convert result DB to tab-separated flat file
+    *mmseqs createtsv <i:queryDB> [<i:targetDB>] <i:resultDB> <o:tsvFile> [options]*
+
+* Create a taxonomy report in Kraken or Krona format
+    *mmseqs taxonomyreport <i:seqTaxDB> <i:taxResultDB/resultDB/sequenceDB> <o:taxonomyReport> [options]*
+
+https://github.com/soedinglab/MMseqs2
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file
author	iuc
date	Thu, 27 Mar 2025 16:42:24 +0000
parents
children	96b969571420