Mercurial > repos > iuc > mmseqs2_taxonomy_assignment
diff mmseqs2_taxonomy_assignment.xml @ 0:dbd5e2791f49 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mmsesq2 commit 25b603890e8048ac7bbffb81eaaa440f87049521
| author | iuc |
|---|---|
| date | Thu, 27 Mar 2025 16:42:24 +0000 |
| parents | |
| children | 96b969571420 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mmseqs2_taxonomy_assignment.xml Thu Mar 27 16:42:24 2025 +0000 @@ -0,0 +1,618 @@ +<tool id="mmseqs2_taxonomy_assignment" name="MMseqs2 Taxonomy Assignments" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + of sequences by comparing them to a reference database + </description> + <macros> + <import>macro.xml</import> + </macros> + <expand macro="biotools"/> + <expand macro="requirements"/> + <expand macro="version_command"/> + <command detect_errors="exit_code"><![CDATA[ +ln -s -f '${createdb.input_fasta}' 'input' && +mmseqs createdb + 'input' + 'sequenceDB' + --dbtype '$createdb.alph_type.dbtype' + --shuffle $createdb.shuffle && + +cp -r '$createtaxdb.database_type.mmseqs2_db_select.fields.path'/database* . && + +mmseqs createtaxdb + database + 'tmp' + #if $createtaxdb.tax_mapping_file + --tax-mapping-file '$createtaxdb.tax_mapping_file' + #end if + --tax-mapping-mode '$createtaxdb.tax_mapping_mode' + --threads "\${GALAXY_SLOTS:-1}" && + +#if $filtertaxseqdb.taxon_list + mmseqs filtertaxseqdb + 'database' + 'database_filtered' + --taxon-list '$filtertaxseqdb.taxon_list' + && +#end if + +mmseqs taxonomy + 'sequenceDB' + #if $filtertaxseqdb.taxon_list + 'database_filtered' + #else + 'database' + #end if + 'output_taxonomy' + 'tmp' + #if str($createdb.alph_type.dbtype) == "1" + --comp-bias-corr-scale $createdb.alph_type.comp_bias_corr_scale + #elif str($createdb.alph_type.dbtype) == "2" + --zdrop $createdb.alph_type.zdrop + #end if + ##Pre-filter options + --add-self-matches $taxonomy.prefilter.add_self_matches + -s $taxonomy.prefilter.sensitivity + -k $taxonomy.prefilter.kmer_length + --target-search-mode $taxonomy.prefilter.target_search_mode + ##--k-score TWIN k-mer threshold for generating similar k-mer lists [seq:2147483647,prof:2147483647] + --max-seqs $taxonomy.prefilter.max_seqs + --split $taxonomy.prefilter.split + --split-mode $taxonomy.prefilter.split_mode + ##--split-memory-limit BYTE Set max memory per split. E.g. 800B, 5K, 10M, 1G. Default (0) to all available system memory [0] + --diag-score $taxonomy.prefilter.diag_score + --exact-kmer-matching $taxonomy.prefilter.exact_kmer_matching + --mask $taxonomy.prefilter.mask + --mask-prob $taxonomy.prefilter.mask_prob + --mask-lower-case $taxonomy.prefilter.mask_lower_case + --min-ungapped-score $taxonomy.prefilter.min_ungapped_score + --spaced-kmer-mode $taxonomy.prefilter.spaced_kmer_mode + ##--spaced-kmer-pattern STR User-specified spaced k-mer pattern [] + ##--local-tmp STR Path where some of the temporary files will be created [] + ##--disk-space-limit BYTE Set max disk space to use for reverse profile searches. E.g. 800B, 5K, 10M, 1G. Default (0) to all available disk space in the temp folder [0] + + ##Align options + -a $taxonomy.align.convertalis + ##The next 2 parameters seems to be the same + --alignment-mode $taxonomy.align.alignment_mode + --alignment-output-mode $taxonomy.align.alignment_output_mode + --wrapped-scoring $taxonomy.align.wrapped_scoring + -e $taxonomy.align.evalue + --min-seq-id $taxonomy.align.min_seq_id + --min-aln-len $taxonomy.align.min_aln_len + --seq-id-mode $taxonomy.align.seq_id_mode + --alt-ali $taxonomy.align.alt_ali + -c $taxonomy.align.cov + --cov-mode $taxonomy.align.cov_mode + --max-rejected $taxonomy.align.max_rejected + --max-accept $taxonomy.align.max_accept + --score-bias $taxonomy.align.score_bias + --realign $taxonomy.align.realign + --realign-score-bias $taxonomy.align.realign_score_bias + --realign-max-seqs $taxonomy.align.realign_max_seqs + --corr-score-weight $taxonomy.align.corr_score_weight + --exhaustive-search-filter $taxonomy.align.exhaustive_search_filter + + ##Profile options + ##--pca Pseudo count admixture strength [] + ##--pcb Pseudo counts: Neff at half of maximum admixture (range 0.0-inf) [] + --mask-profile $taxonomy.profile.mask_profile + --e-profile $taxonomy.profile.e_profile + --wg $taxonomy.profile.wg + --filter-msa $taxonomy.profile.filter_msa + --filter-min-enable $taxonomy.profile.filter_min_enable + --max-seq-id $taxonomy.profile.max_seq_id + --qid $taxonomy.profile.qid + --qsc $taxonomy.profile.qsc + --cov $taxonomy.profile.cov + --diff $taxonomy.profile.diff + --pseudo-cnt-mode $taxonomy.profile.pseudo_cnt_mode + --exhaustive-search $taxonomy.profile.exhaustive_search + --lca-search $taxonomy.profile.lca_search + + ##Misc options + ##--orf-filter INT Prefilter query ORFs with non-selective search + ## Only used during nucleotide-vs-protein classification + ## NOTE: Consider disabling when classifying short reads [1] + --orf-filter-e $taxonomy.misc.orf_filter_e + --orf-filter-s $taxonomy.misc.orf_filter_s + --lca-mode $taxonomy.misc.lca_mode + --tax-output-mode $taxonomy.misc.tax_output_mode + --majority $taxonomy.misc.majority + --vote-mode $taxonomy.misc.vote_mode + ##--lca-ranks STR Add column with specified ranks (',' separated) [] + --tax-lineage $taxonomy.misc.tax_lineage + --blacklist $taxonomy.misc.blacklist + --taxon-list $taxonomy.misc.taxon_list + --rescore-mode $taxonomy.misc.rescore_mode + --allow-deletion $taxonomy.misc.allow_deletion + --min-length $taxonomy.misc.min_length + --max-length $taxonomy.misc.max_length + --max-gaps $taxonomy.misc.max_gaps + --contig-start-mode $taxonomy.misc.contig_start_mode + --contig-end-mode $taxonomy.misc.contig_end_mode + --orf-start-mode $taxonomy.misc.orf_start_mode + --forward-frames $taxonomy.misc.forward_frames + --reverse-frames $taxonomy.misc.reverse_frames + --translation-table $taxonomy.misc.translation_table + --translate $taxonomy.misc.translate + --use-all-table-starts $taxonomy.misc.use_all_table_starts + --id-offset $taxonomy.misc.id_offset + --add-orf-stop $taxonomy.misc.add_orf_stop + --sequence-overlap $taxonomy.misc.sequence_overlap + --sequence-split-mode $taxonomy.misc.sequence_split_mode + --headers-split-mode $taxonomy.misc.headers_split_mode + --search-type $createtaxdb.database_type.search_type + --prefilter-mode $taxonomy.misc.prefilter_mode + + ##Common options + ##--compressed INT Write compressed output [0] + --threads "\${GALAXY_SLOTS:-1}" + ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] + --max-seq-len $taxonomy.common.max_seq_len + ##--db-load-mode INT Database preload mode 0: auto, 1: fread, 2: mmap, 3: mmap+touch [0] + ##--mpi-runner STR Use MPI on compute cluster with this MPI command (e.g. "mpirun -np 42") [] + ##--force-reuse BOOL Reuse tmp filse in tmp/latest folder ignoring parameters and version changes [0] + ##--remove-tmp-files BOOL Delete temporary files [0] + + ##Expert options + --filter-hits $taxonomy.expert.filter_hits + --sort-results $taxonomy.expert.sort_results + ##--create-lookup INT Create database lookup file (can be very large) [0] + --chain-alignments $taxonomy.expert.chain_alignments + --merge-query $taxonomy.expert.merge_query + ##--strand INT Strand selection only works for DNA/DNA search 0: reverse, 1: forward, 2: both [1] +&& +mmseqs createtsv + 'sequenceDB' + 'output_taxonomy' + 'taxo_result.tsv' + + --first-seq-as-repr $createtsv.first_seq_as_repr + --target-column $createtsv.target_column + --full-header $createtsv.full_header + --idx-seq-src $createtsv.idx_seq_src + --threads "\${GALAXY_SLOTS:-1}" + ##--compressed INT Write compressed output [0] + ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] + ##--db-output BOOL Return a result DB instead of a text file [0] + +#if str($kraken_report.keep_report) == "Yes" +&& + mmseqs taxonomyreport + #if $filtertaxseqdb.taxon_list + 'database_filtered' + #else + 'database' + #end if + 'output_taxonomy' + 'taxo_result.txt' + --report-mode 0 + --threads "\${GALAXY_SLOTS:-1}" +#end if +#if str($krona_report.keep_report) == "Yes" +&& + mmseqs taxonomyreport + #if $filtertaxseqdb.taxon_list + 'database_filtered' + #else + 'database' + #end if + 'output_taxonomy' + 'taxo_result.html' + --report-mode 1 + --threads "\${GALAXY_SLOTS:-1}" +#end if + ##-v INT Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info [3] + + ]]></command> + <inputs> + <section name="createdb" title="Convert FASTA/Q file(s) to MMseqs sequence DB format" expanded="true"> + <param name="input_fasta" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input fasta file" help="" /> + <conditional name="alph_type"> + <param argument="--dbtype" type="select" label="Input type" help="" > + <option value="0" selected="true">Auto</option> + <option value="1">Amino acid</option> + <option value="2">Nucleotides</option> + </param> + <when value="0"/> + <when value="1"> + <param argument="--comp-bias-corr-scale" type="float" min="0" max="1" value="1" label="Scale composition bias correction" help=""/> + </when> + <when value="2"> + <param argument="--zdrop" type="integer" min="0" value="40" label="Maximal allowed difference between score values before alignment is truncated" help=""/> + </when> + </conditional> + <param argument="--shuffle" type="boolean" checked="true" label="Shuffle input database" truevalue="1" falsevalue="0" optional="true" help="" /> + </section> + <section name="createtaxdb" title="Add taxonomic labels to reference sequence DB" expanded="true"> + <conditional name="database_type"> + <param name="type" type="select" label="Database type" help="" > + <option value="amino_acid_tax" selected="true">Amino acid with taxonomy information</option> + <option value="nucleotides_tax">Nucleotides with taxonomy information</option> + <option value="amino_acid">Amino acid without taxonomy information</option> + <option value="nucleotides">Nucleotides without taxonomy information</option> + </param> + <when value="amino_acid_tax"> + <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> + <options from_data_table="mmseqs2_databases"> + <filter type="static_value" value="aminoacid" column="type"/> + <filter type="static_value" value="yes" column="taxonomy"/> + <validator message="No mmseqs2 database is available" type="no_options"/> + </options> + </param> + <expand macro="search_type_aa" /> + </when> + <when value="nucleotides_tax"> + <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> + <options from_data_table="mmseqs2_databases"> + <filter type="static_value" value="nucleotide" column="type"/> + <filter type="static_value" value="yes" column="taxonomy"/> + <validator message="No mmseqs2 database is available" type="no_options"/> + </options> + </param> + <expand macro="search_type_nt" /> + </when> + <when value="amino_acid"> + <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> + <options from_data_table="mmseqs2_databases"> + <filter type="static_value" value="aminoacid" column="type"/> + <filter type="static_value" value="no" column="taxonomy"/> + <validator message="No mmseqs2 database is available" type="no_options"/> + </options> + </param> + <expand macro="search_type_aa" /> + </when> + <when value="nucleotides"> + <param name="mmseqs2_db_select" type="select" label="MMseqs2 databases"> + <options from_data_table="mmseqs2_databases"> + <filter type="static_value" value="nucleotide" column="type"/> + <filter type="static_value" value="no" column="taxonomy"/> + <validator message="No mmseqs2 database is available" type="no_options"/> + </options> + </param> + <expand macro="search_type_nt" /> + </when> + </conditional> + <param argument="--tax-mapping-file" type="data" format="tabular,tsv,txt" label="File to map sequence identifier to taxonomical identifier" optional="true"/> + <param argument="--tax-mapping-mode" type="select" label="Map taxonomy based on sequence database" help="" > + <option value="0" selected="true">0: .lookup file</option> + <option value="1">1: .source file</option> + </param> + </section> + <section name="filtertaxseqdb" title="Filter taxonomy sequence database"> + <param argument="--taxon-list" type="text" optional="true" value="" label="Taxonomy ID" help="Possibly multiple values separated by ','"/> + </section> + <section name="taxonomy" title="Taxonomy assignment by computing the lowest common ancestor of homologs"> + <section name="prefilter" title="Pre-filter"> + <expand macro="prefilter_common_parameters" /> + <param argument="--spaced-kmer-mode" type="select" label="Spaced k-mer mode" help=""> + <option value="0">Use consecutive positions in k-mers</option> + <option value="1" selected="true">Use spaced k-mers</option> + </param> + <param argument="--min-ungapped-score" type="integer" min="0" value="15" label="Accept only matches with ungapped alignment score above threshold" help=""/> + <param argument="-s" name="sensitivity" type="float" min="0" max="7.5" value="2" label="Sensitivity" help="1.0 faster; 4.0 fast; 7.5 sensitive"/> + <param argument="--target-search-mode" type="select" label="Target search mode" help="" > + <option value="0" selected="true">Regular k-mer</option> + <option value="1">Similar k-mer</option> + </param> + <param argument="--max-seqs" type="integer" min="0" value="300" label="Maximum results per query sequence allowed to pass the prefilter" help="Affects sensitivity"/> + <param argument="--split" type="integer" min="0" value="0" label="Split input into N equally distributed chunks" help="0: set the best split automatically"/> + <param argument="--split-mode" type="select" label="Split mode" help="" > + <option value="0">Split target db</option> + <option value="1">Split query db</option> + <option value="2" selected="true">Auto, depending on main memory</option> + </param> + <param argument="--diag-score" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Use ungapped diagonal scoring during prefilter" help=""/> + <param argument="--exact-kmer-matching" type="integer" min="0" max="1" value="0" label="Extract only exact k-mers for matching" help=""/> + </section> + <section name="align" title="Align"> + <expand macro="align_common_parameters" /> + <param argument="--alignment-mode" type="select" label="Alignment mode : How to compute the alignment" help="" > + <option value="0">Automatic</option> + <option value="1" selected="true">Only score and end_pos</option> + <option value="2">Also start_pos and cov</option> + <option value="3">Also seq.id</option> + <option value="4">Only ungapped alignment</option> + </param> + <param argument="-e" name="evalue" type="float" min="0" value="1" label="E-value threshold" help="List matches below this E-value"/> + <param argument="--min-seq-id" type="float" min="0" max="1" value="0" label="Minimum sequence identity" help="List matches above this sequence identity for clustering"/> + <param argument="-c" name="cov" type="float" min="0" value="0" label="List matches above this fraction of aligned (covered) residues" help=""/> + <param argument="--cov-mode" type="select" label="Coverage mode" help="" > + <option value="0" selected="true">Coverage of query and target</option> + <option value="1">Coverage of target</option> + <option value="2">Coverage of query</option> + <option value="3">Target seq. length has to be at least x% of query length</option> + <option value="4">Query seq. length has to be at least x% of target length</option> + <option value="5">Short seq. needs to be at least x% of the other seq. length</option> + </param> + <param argument="--max-rejected" type="integer" min="0" value="5" label="Maximum rejected alignments before alignment calculation for a query is stopped" help=""/> + <param argument="--max-accept" type="integer" min="0" value="30" label="Maximum accepted alignments before alignment calculation for a query is stopped" help=""/> + <param argument="--exhaustive-search-filter" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Filter result during search ?" help=""/> + </section> + <section name="profile" title="Profile"> + <param argument="--mask-profile" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Mask query sequence of profile using tantan" help=""/> + <param argument="--e-profile" type="float" min="0" value="1e-03" label="Include sequences matches with inf E-value threshold into the profile" help=""/> + <param argument="--wg" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use global sequence weighting for profile calculation" help=""/> + <param argument="--filter-msa" type="boolean" checked="true" truevalue="1" falsevalue="0" label="Filter MSA" help=""/> + <param argument="--filter-min-enable" type="integer" min="0" value="0" label="Only filter MSAs with more than N sequences, 0 always filters" help=""/> + <param argument="--max-seq-id" type="float" min="0" max="1" value="0.9" label="Reduce redundancy of output MSA using max. pairwise sequence identity" help=""/> + <param argument="--qid" type="text" value="0" label="Reduce diversity of output MSAs using min.seq. identity with query sequences [0.0,1.0]" help="Alternatively, can be a list of multiple thresholds: + E.g.: 0.15,0.30,0.50 to defines filter buckets of ]0.15-0.30] and ]0.30-0.50]"/> + <param argument="--qsc" type="float" min="-50" max="100" value="-20" label="Reduce diversity of output MSAs using min. score per aligned residue with query sequences" help=""/> + <param argument="--cov" type="float" min="0" max="1" value="0" label="Filter output MSAs using min. fraction of query residues covered by matched sequences" help=""/> + <param argument="--diff" type="integer" min="0" value="1000" label="Filter MSAs by selecting most diverse set of sequences, keeping at least this many seqs in each MSA block of length 50" help=""/> + <param argument="--pseudo-cnt-mode" type="select" label="Pseudo count mode" help="" > + <option value="0" selected="true">Substitution-matrix</option> + <option value="1">Context-specific pseudocounts</option> + </param> + <param argument="--exhaustive-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Exhaustive search" help=""/> + <param argument="--lca-search" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Efficient search for LCA candidates" help=""/> + </section> + <section name="misc" title="Misc"> + <param argument="--orf-filter-e" type="float" min="0" value="1.000E+02" label="E-value threshold used for query ORF prefiltering" help=""/> + <param argument="--orf-filter-s" type="float" min="0" value="2" label="Sensitivity used for query ORF prefiltering" help=""/> + <param argument="--lca-mode" type="select" label="LCA mode" help="" > + <option value="1">Single search LCA</option> + <option value="3" selected="true">Approximate 2bLCA</option> + <option value="4">Top hit</option> + </param> + <param argument="--tax-output-mode" type="select" label="Taxonomy output mode" help="" > + <option value="0" selected="true">Output LCA</option> + <option value="1">Output alignment</option> + <option value="2">Output both</option> + </param> + <param argument="--majority" type="float" min="0" value="0.5" label="Minimal fraction of agreement among taxonomically assigned sequences of a set" help=""/> + <param argument="--vote-mode" type="select" label="Mode of assigning weights to compute majority" help="" > + <option value="0">Uniform</option> + <option value="1" selected="true">Minus log E-value</option> + <option value="2">Score</option> + </param> + <param argument="--tax-lineage" type="select" label="Taxonomy lineage" help="" > + <option value="0" selected="true">Don't show</option> + <option value="1">Add all lineage names</option> + <option value="2">Add all lineage taxids</option> + </param> + <param argument="--blacklist" type="text" value="" label="Comma separated list of ignored taxa in LCA computation" help=""/> + <param argument="--taxon-list" type="text" value="" label="Taxonomy ID, possibly multiple values separated by ','" help=""/> + <param argument="--rescore-mode" type="select" label="Rescore diagonals with" help="" > + <option value="0" selected="true">Hamming distance</option> + <option value="1">Local alignment (score only)</option> + <option value="2">Local alignment</option> + <option value="3">Global alignment</option> + <option value="4">Longest alignment fulfilling window quality criterion</option> + </param> + <param argument="--allow-deletion" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Allow deletions in a MSA" help=""/> + <param argument="--min-length" type="integer" min="0" value="30" label="Minimum codon number in open reading frames" help=""/> + <param argument="--max-length" type="integer" min="0" value="32734" label="Maximum codon number in open reading frames" help=""/> + <param argument="--max-gaps" type="integer" min="0" value="2147483647" label="Maximum number of codons with gaps or unknown residues before an open reading frame is rejected" help=""/> + <param argument="--contig-start-mode" type="select" label="Contig start can be" help="" > + <option value="0">Incomplete</option> + <option value="1">Complete</option> + <option value="2" selected="true">Both</option> + </param> + <param argument="--contig-end-mode" type="select" label="Contig end can be" help="" > + <option value="0">Incomplete</option> + <option value="1">Complete</option> + <option value="2" selected="true">Both</option> + </param> + <param argument="--orf-start-mode" type="select" label="ORF fragment can be" help="" > + <option value="0">From start to stop</option> + <option value="1" selected="true">From any to stop</option> + <option value="2">From last encountered start to stop (no start in the middle)</option> + </param> + <param argument="--forward-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the forward strand to be extracted" help=""/> + <param argument="--reverse-frames" type="text" value="1,2,3" label="Comma-separated list of frames on the reverse strand to be extracted" help=""/> + <param argument="--translation-table" type="select" label="Translation table" help=""> + <option value="1" selected="true">Canonical</option> + <option value="2">The Vertebrate Mitochondrial Code</option> + <option value="3">The Yeast Mitochondrial Code</option> + <option value="4">The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option> + <option value="5">The Invertebrate Mitochondrial Code</option> + <option value="6">The Ciliate, Dasycladacean and Hexamita Nuclear Code</option> + <option value="9">The Echinoderm and Flatworm Mitochondrial Code</option> + <option value="10">The Euplotid Nuclear Code</option> + <option value="11">The Bacterial, Archaeal and Plant Plastid Code</option> + <option value="12">The Alternative Yeast Nuclear Code</option> + <option value="13">The Ascidian Mitochondrial Code</option> + <option value="14">The Alternative Flatworm Mitochondrial Code</option> + <option value="15">Blepharisma Nuclear Code</option> + <option value="16">Chlorophycean Mitochondrial Code</option> + <option value="21">Trematode Mitochondrial Code</option> + <option value="22">Scenedesmus obliquus Mitochondrial Code</option> + <option value="23">Thraustochytrium Mitochondrial Code</option> + <option value="24">Rhabdopleuridae Mitochondrial Code</option> + <option value="25">Candidate Division SR1 and Gracilibacteria Code</option> + <option value="26">Pachysolen tannophilus Nuclear Code</option> + <option value="27">Karyorelict Nuclear Code</option> + <option value="28">Condylostoma Nuclear Code</option> + <option value="29">Mesodinium Nuclear Code</option> + <option value="30">Peritrich Nuclear Code</option> + <option value="31">Blastocrithidia Nuclear Code</option> + </param> + <param argument="--translate" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Translate ORF to amino acid" help=""/> + <param argument="--use-all-table-starts" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use all alternatives for a start codon in the genetic table, if false - only ATG (AUG)" help=""/> + <param argument="--id-offset" type="integer" min="0" value="0" label="Numeric ids in index file are offset by this value" help=""/> + <param argument="--add-orf-stop" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Add stop codon '*' at complete start and end" help=""/> + <param argument="--sequence-overlap" type="integer" min="0" value="0" label="Overlap between sequences" help=""/> + <param argument="--sequence-split-mode" type="select" label="Sequence split mode" help="" > + <option value="0">Copy data</option> + <option value="1" selected="true">Soft link data and write new index</option> + </param> + <param argument="--headers-split-mode" type="select" label="Headers split mode" help="" > + <option value="0" selected="true">Split position</option> + <option value="1">Original header</option> + </param> + <param argument="--prefilter-mode" type="select" label="Prefilter mode" help="" > + <option value="0" selected="true">Kmer/ungapped</option> + <option value="1">Ungapped</option> + <option value="2">No filter</option> + </param> + </section> + <expand macro="common_section"/> + <section name="expert" title="Expert"> + <expand macro="expert_common_parameters" /> + <param argument="--chain-alignments" type="integer" min="0" value="0" label="Chain alignments" help=""/> + <param argument="--merge-query" type="integer" min="0" value="1" label="Combine ORFs/split sequences to a single entry" help=""/> + </section> + </section> + <section name="createtsv" title="Create a tsv report from taxonomy output "> + <param argument="--first-seq-as-repr" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Use the first sequence of the clustering result as representative sequence" help=""/> + <param argument="--target-column" type="integer" min="0" value="1" label="Select a target column, 0 if no target id exists" help="" /> + <param argument="--full-header" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Replace DB ID by its corresponding Full Header" help=""/> + <param argument="--idx-seq-src" type="select" label="Index sequences source" help=""> + <option value="0" selected="true">Auto</option> + <option value="1">Split/translated sequences</option> + <option value="2">Input sequences</option> + </param> + </section> + <conditional name="kraken_report"> + <param name="keep_report" type="select" label="Do you want a Kraken style report" help="" > + <option value="Yes" selected="true">Yes</option> + <option value="No">No</option> + </param> + <when value="Yes"/> + <when value="No"/> + </conditional> + <conditional name="krona_report"> + <param name="keep_report" type="select" label="Do you want a Krona style report" help="" > + <option value="Yes" selected="true">Yes</option> + <option value="No">No</option> + </param> + <when value="Yes"/> + <when value="No"/> + </conditional> + </inputs> + <outputs> + <data name="output_taxonomy_tsv" format="tabular" from_work_dir="taxo_result.tsv" label="${tool.name} on ${on_string}: Taxonomy Report"/> + <data name="output_taxonomy_kraken" format="txt" from_work_dir="taxo_result.txt" label="${tool.name} on ${on_string}: Kraken Report"> + <filter>kraken_report['keep_report'] == "Yes"</filter> + </data> + <data name="output_taxonomy_krona" format="html" from_work_dir="taxo_result.html" label="${tool.name} on ${on_string}: Krona Report"> + <filter>krona_report['keep_report'] == "Yes"</filter> + </data> + </outputs> + <tests> + <!-- Test with Kraken report --> + <test expect_num_outputs="2"> + <section name="createdb"> + <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> + <conditional name="alph_type"> + <param name="dbtype" value="2"/> + </conditional> + </section> + <section name="createtaxdb"> + <conditional name="database_type"> + <param name="type" value="amino_acid_tax"/> + <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> + </conditional> + </section> + <section name="filtertaxseqdb"> + <param name="taxon_list" value="2" /> + </section> + <conditional name="krona_report"> + <param name="keep_report" value="No"/> + </conditional> + <output name="output_taxonomy_tsv" ftype="tabular"> + <assert_contents> + <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> + <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> + <has_n_columns n="8"/> + </assert_contents> + </output> + <output name="output_taxonomy_kraken" ftype="txt"> + <assert_contents> + <has_text text="93.3333"/> + <has_text text="33.3333"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <section name="createdb"> + <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> + </section> + <section name="createtaxdb"> + <conditional name="database_type"> + <param name="type" value="amino_acid_tax"/> + <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> + </conditional> + </section> + <conditional name="kraken_report"> + <param name="keep_report" value="No"/> + </conditional> + <output name="output_taxonomy_tsv" ftype="tabular"> + <assert_contents> + <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> + <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> + <has_n_columns n="8"/> + </assert_contents> + </output> + <output name="output_taxonomy_krona" ftype="html"> + <assert_contents> + <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="3"> + <section name="createdb"> + <param name="input_fasta" value="light_mystery_reads.fasta" ftype="fasta"/> + </section> + <section name="createtaxdb"> + <conditional name="database_type"> + <param name="type" value="amino_acid_tax"/> + <param name="mmseqs2_db_select" value="UniProtKB/Swiss-Prot-15.6f452-10022025" /> + </conditional> + </section> + <output name="output_taxonomy_tsv" ftype="tabular"> + <assert_contents> + <has_line line="MYSTERY.222	1236	class	Gammaproteobacteria	1	1	1	1.000"/> + <has_line line="MYSTERY.64	119060	family	Burkholderiaceae	1	1	1	1.000"/> + <has_n_columns n="8"/> + </assert_contents> + </output> + <output name="output_taxonomy_krona" ftype="html"> + <assert_contents> + <has_line line="// Krona is a flexible tool for exploring the relative proportions of"/> + </assert_contents> + </output> + <output name="output_taxonomy_kraken" ftype="txt"> + <assert_contents> + <has_text text="93.3333"/> + <has_text text="33.3333"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**MMseqs2: ultra fast and sensitive sequence search and clustering suite** + +MMseqs2 (Many-against-Many sequence searching) is a software suite to search and cluster huge protein and nucleotide sequence sets. +MMseqs2 is open source GPL-licensed software implemented in C++ for Linux, MacOS, and (as beta version, via cygwin) Windows. +The software is designed to run on multiple cores and servers and exhibits very good scalability. +MMseqs2 can run 10000 times faster than BLAST. At 100 times its speed it achieves almost the same sensitivity. +It can perform profile searches with the same sensitivity as PSI-BLAST at over 400 times its speed. + +**Usage** + +* Convert FASTA/Q file(s) to MMseqs sequence DB format + *mmseqs createdb <i:fastaFile1[.gz|.bz2]> ... <i:fastaFileN[.gz|.bz2]>|<i:stdin> <o:sequenceDB> [options]* + +* Add taxonomic labels to sequence DB + *mmseqs createtaxdb <i:sequenceDB> <tmpDir> [options]* + +* Filter taxonomy sequence database + *mmseqs filtertaxseqdb <i:taxSeqDB> <o:taxSeqDB> [options]* + +* Taxonomy assignment by computing the lowest common ancestor of homologs + *mmseqs taxonomy <i:queryDB> <i:targetDB> <o:taxaDB> <tmpDir> [options]* + +* Convert result DB to tab-separated flat file + *mmseqs createtsv <i:queryDB> [<i:targetDB>] <i:resultDB> <o:tsvFile> [options]* + +* Create a taxonomy report in Kraken or Krona format + *mmseqs taxonomyreport <i:seqTaxDB> <i:taxResultDB/resultDB/sequenceDB> <o:taxonomyReport> [options]* + +https://github.com/soedinglab/MMseqs2 + + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file
