annotate data_manager/malt_index_builder.xml @ 1:787f1ca9045a draft default tip

Uploaded
author greg
date Wed, 13 Oct 2021 20:12:48 +0000
parents d69ebf52c233
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
1 <tool id="data_manager_malt_index_builder" name="MALT index builder" tool_type="manage_data" version="0.5.3+galaxy0" profile="21.01">
d69ebf52c233 Uploaded
greg
parents:
diff changeset
2 <description></description>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
3 <requirements>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
4 <requirement type="package" version="0.53">malt</requirement>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
5 </requirements>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
6 <command detect_errors="exit_code"><![CDATA[
d69ebf52c233 Uploaded
greg
parents:
diff changeset
7 python '$__tool_directory__/malt_index_builder.py'
d69ebf52c233 Uploaded
greg
parents:
diff changeset
8 '${out_file}'
d69ebf52c233 Uploaded
greg
parents:
diff changeset
9 --fasta_filename '${all_fasta_source.fields.path}'
d69ebf52c233 Uploaded
greg
parents:
diff changeset
10 --fasta_dbkey '${all_fasta_source.fields.dbkey}'
d69ebf52c233 Uploaded
greg
parents:
diff changeset
11 --fasta_description '${all_fasta_source.fields.name}'
d69ebf52c233 Uploaded
greg
parents:
diff changeset
12 --sequence_type '${sequence_type}'
1
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
13 #if str($protein_reduct_setting_cond.protein_reduct_setting) == 'yes':
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
14 --protein_reduct '${protein_reduct_setting_cond.protein_reduct}'
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
15 #end if
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
16 #if str($seed_setting_cond.seed_setting) == 'yes':
d69ebf52c233 Uploaded
greg
parents:
diff changeset
17 --shapes '${seed_setting_cond.shapes}'
1
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
18 ## malt-build requires a string here.
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
19 --max_hits_per_seed '${seed_setting_cond.max_hits_per_seed}'
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
20 #end if
d69ebf52c233 Uploaded
greg
parents:
diff changeset
21 ]]></command>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
22 <inputs>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
23 <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
d69ebf52c233 Uploaded
greg
parents:
diff changeset
24 <options from_data_table="all_fasta"/>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
25 </param>
1
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
26 <param name="sequence_name" type="text" value="" label="Name of sequence"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
27 <param name="sequence_id" type="text" value="" label="ID for sequence"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
28 <param name="sequence_type" type="select" label="Reference sequences type" help="Use the DNA setting For RNA sequences">
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
29 <option value="DNA" selected="true">DNA</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
30 <option value="Protein">Protein</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
31 </param>
1
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
32 <conditional name="protein_reduct_setting_cond">
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
33 <param name="protein_reduct_setting" type="select" label="Specify protein alphabet resuction?" help="Used only if the reference sequences are Protein sequences">
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
34 <option selected="true" value="no">No</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
35 <option value="yes">Yes</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
36 </param>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
37 <when value="no"/>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
38 <when value="yes">
d69ebf52c233 Uploaded
greg
parents:
diff changeset
39 <param name="protein_reduct" type="select" label="Name or definition of protein alphabet reduction">
d69ebf52c233 Uploaded
greg
parents:
diff changeset
40 <option selected="true" value="DIAMOND_11">DIAMOND_11</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
41 <option value="BLOSUM50_10">BLOSUM50_10</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
42 <option value="BLOSUM50_11">BLOSUM50_11</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
43 <option value="BLOSUM50_15">BLOSUM50_15</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
44 <option value="BLOSUM50_4">BLOSUM50_4</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
45 <option value="BLOSUM50_8">BLOSUM50_8</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
46 <option value="GBMR4">GBMR4</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
47 <option value="HSDM17">HSDM17</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
48 <option value="MALT_10">MALT_10</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
49 <option value="SDM12">SDM12</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
50 <option value="UNREDUCED">UNREDUCED</option>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
51 </param>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
52 </when>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
53 </conditional>
1
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
54 <conditional name="seed_setting_cond">
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
55 <param name="seed_setting" type="select" label="Specify seed settings?">
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
56 <option selected="true" value="no">No</option>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
57 <option value="yes">Yes</option>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
58 </param>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
59 <when value="no"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
60 <when value="yes">
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
61 <param name="shapes" type="text" value="" label="Comma-separated list of seed shapes" help="See help text below">
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
62 <sanitizer invalid_char="">
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
63 <valid initial="string.printable">
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
64 <remove value="&apos;"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
65 </valid>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
66 <mapping initial="none">
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
67 <add source="&apos;" target="&apos;&quot;&apos;&quot;&apos;"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
68 </mapping>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
69 </sanitizer>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
70 </param>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
71 <param name="max_hits_per_seed" type="integer" value="1" min="1" label="Maximum number of hits per seed"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
72 </when>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
73 </conditional>
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
74 </inputs>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
75 <outputs>
1
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
76 <data name="out_file" format="data_manager_json"/>
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
77 </outputs>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
78 <tests>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
79 <test>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
80 <param name="all_fasta_source" value="phiX174"/>
1
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
81 <param name="sequence_name" value="Pretty name for phiX174"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
82 <param name="sequence_id" value="phiX174-1"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
83 <output name="out_file" value="malt_index_builder1.json"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
84 </test>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
85 <test>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
86 <param name="all_fasta_source" value="phiX174"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
87 <param name="sequence_name" value="Pretty name for phiX174"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
88 <param name="sequence_id" value="phiX174-1"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
89 <param name="seed_setting" value="yes"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
90 <param name="shapes" value="111110111011110110111111"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
91 <output name="out_file" value="malt_index_builder1.json"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
92 </test>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
93 <test>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
94 <param name="all_fasta_source" value="phiX174"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
95 <param name="sequence_name" value="Pretty name for phiX174"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
96 <param name="sequence_id" value="phiX174-2"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
97 <param name="sequence_type" value="Protein"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
98 <param name="protein_reduct_setting" value="yes"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
99 <param name="protein_reduct" value="BLOSUM50_10"/>
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
100 <output name="out_file" value="malt_index_builder2.json"/>
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
101 </test>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
102 </tests>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
103 <help>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
104 .. class:: infomark
d69ebf52c233 Uploaded
greg
parents:
diff changeset
105
1
787f1ca9045a Uploaded
greg
parents: 0
diff changeset
106 **Notice:** Values for Name and ID of sequence will be generated automatically if left blank.
0
d69ebf52c233 Uploaded
greg
parents:
diff changeset
107
d69ebf52c233 Uploaded
greg
parents:
diff changeset
108 **What it does**
d69ebf52c233 Uploaded
greg
parents:
diff changeset
109
d69ebf52c233 Uploaded
greg
parents:
diff changeset
110 Takes a reference sequence database (represented by a FastA file, possibly in gzip format) as input and produces an index that
d69ebf52c233 Uploaded
greg
parents:
diff changeset
111 can be used by the malt tool as input. If MALT is to be used as a taxonomic and/or functional analysis tool as well as an
d69ebf52c233 Uploaded
greg
parents:
diff changeset
112 alignment tool, then this MALT index builder tool must be provided with a number of mapping files that are used to map reference
d69ebf52c233 Uploaded
greg
parents:
diff changeset
113 sequences to taxonomic or functional classes or to locate genes in DNA reference sequences.
d69ebf52c233 Uploaded
greg
parents:
diff changeset
114
d69ebf52c233 Uploaded
greg
parents:
diff changeset
115 **Options**
d69ebf52c233 Uploaded
greg
parents:
diff changeset
116
d69ebf52c233 Uploaded
greg
parents:
diff changeset
117 * **Specify seed settings** - specify the settings for controlling how MALT uses its seed-and-extend approach based on “spaced seeds”.
d69ebf52c233 Uploaded
greg
parents:
diff changeset
118
d69ebf52c233 Uploaded
greg
parents:
diff changeset
119 * **Shapes** - specify the seed shapes used. For DNA sequences, the default seed shape is: 111110111011110110111111. For protein sequences, by default MALT uses the following four shapes: 111101101110111, 1111000101011001111, 11101001001000100101111 and 11101001000010100010100111.
d69ebf52c233 Uploaded
greg
parents:
diff changeset
120 * **Maximim hits per seed** - specify the maximum number of hits per seed - MALT uses this to calculate a maximum number of hits per hash value.
d69ebf52c233 Uploaded
greg
parents:
diff changeset
121 * **Protein reduction** - specify the alphabet reduction in the case of protein reference sequences. By default, MALT reduces amino acids to 8 different letters, grouped as follows: [LVIMC] [AG] [ST] [P] [FYW] [EDNQ] [KR] [H].
d69ebf52c233 Uploaded
greg
parents:
diff changeset
122 </help>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
123 <citations>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
124 <citation type="doi">https://doi.org/10.1101/050559</citation>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
125 </citations>
d69ebf52c233 Uploaded
greg
parents:
diff changeset
126 </tool>