Mercurial > repos > greg > data_manager_malt_index_builder
diff data_manager/malt_index_builder.xml @ 0:d69ebf52c233 draft
Uploaded
author | greg |
---|---|
date | Tue, 12 Oct 2021 14:15:35 +0000 |
parents | |
children | 787f1ca9045a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/malt_index_builder.xml Tue Oct 12 14:15:35 2021 +0000 @@ -0,0 +1,95 @@ +<tool id="data_manager_malt_index_builder" name="MALT index builder" tool_type="manage_data" version="0.5.3+galaxy0" profile="21.01"> + <description></description> + <requirements> + <requirement type="package" version="0.53">malt</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +python '$__tool_directory__/malt_index_builder.py' +'${out_file}' +--fasta_filename '${all_fasta_source.fields.path}' +--fasta_dbkey '${all_fasta_source.fields.dbkey}' +--fasta_description '${all_fasta_source.fields.name}' +--sequence_type '${sequence_type}' +#if str($seed_setting_cond.seed_setting) == 'yes': + --shapes '${seed_setting_cond.shapes}' + --max_hits_per_seed $seed_setting_cond.max_hits_per_seed + --protein_reduct '${seed_setting_cond.protein_reduct}' +#end if + ]]></command> + <inputs> + <param name="all_fasta_source" type="select" label="Source FASTA Sequence"> + <options from_data_table="all_fasta"/> + </param> + <param name="sequence_name" type="text" value="" label="Name of sequence" /> + <param name="sequence_id" type="text" value="" label="ID for sequence" /> + <param name="sequence_type" type="select" label="Specify whether the reference sequences are DNA or Protein sequences" help="Use the DNA setting For RNA sequences"> + <option value="DNA" selected="true">DNA</option> + <option value="Protein">Protein</option> + </param> + <conditional name="seed_setting_cond"> + <param name="seed_setting" type="select" label="Specify seed settings?"> + <option selected="true" value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="shapes" type="text" value="" label="Comma-separated list of seed shapes" help="See help text below"> + <sanitizer invalid_char=""> + <valid initial="string.printable"> + <remove value="'" /> + </valid> + <mapping initial="none"> + <add source="'" target="'"'"'" /> + </mapping> + </sanitizer> + </param> + <param name="max_hits_per_seed" type="integer" value="1" min="1" label="Maximum number of hits per seed"/> + <param name="protein_reduct" type="select" label="Name or definition of protein alphabet reduction"> + <option selected="true" value="DIAMOND_11">DIAMOND_11</option> + <option value="BLOSUM50_10">BLOSUM50_10</option> + <option value="BLOSUM50_11">BLOSUM50_11</option> + <option value="BLOSUM50_15">BLOSUM50_15</option> + <option value="BLOSUM50_4">BLOSUM50_4</option> + <option value="BLOSUM50_8">BLOSUM50_8</option> + <option value="GBMR4">GBMR4</option> + <option value="HSDM17">HSDM17</option> + <option value="MALT_10">MALT_10</option> + <option value="SDM12">SDM12</option> + <option value="UNREDUCED">UNREDUCED</option> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" /> + </outputs> + <tests> + <test> + <param name="all_fasta_source" value="phiX174"/> + <output name="out_file" value="malt_index_builder.json"/> + </test> + </tests> + <help> +.. class:: infomark + +**Notice:** Values for name, description, and id will be generated automatically if left blank. + +**What it does** + +Takes a reference sequence database (represented by a FastA file, possibly in gzip format) as input and produces an index that +can be used by the malt tool as input. If MALT is to be used as a taxonomic and/or functional analysis tool as well as an +alignment tool, then this MALT index builder tool must be provided with a number of mapping files that are used to map reference +sequences to taxonomic or functional classes or to locate genes in DNA reference sequences. + +**Options** + + * **Specify seed settings** - specify the settings for controlling how MALT uses its seed-and-extend approach based on “spaced seeds”. + + * **Shapes** - specify the seed shapes used. For DNA sequences, the default seed shape is: 111110111011110110111111. For protein sequences, by default MALT uses the following four shapes: 111101101110111, 1111000101011001111, 11101001001000100101111 and 11101001000010100010100111. + * **Maximim hits per seed** - specify the maximum number of hits per seed - MALT uses this to calculate a maximum number of hits per hash value. + * **Protein reduction** - specify the alphabet reduction in the case of protein reference sequences. By default, MALT reduces amino acids to 8 different letters, grouped as follows: [LVIMC] [AG] [ST] [P] [FYW] [EDNQ] [KR] [H]. + </help> + <citations> + <citation type="doi">https://doi.org/10.1101/050559</citation> + </citations> +</tool>