comparison framebot.xml @ 0:73b3e2f98631 draft default tip

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/rdptools commit fa8135b9b1918785c3f3b3fb7325bfe031b44ec4
author bebatut
date Mon, 16 Nov 2015 02:46:50 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:73b3e2f98631
1 <tool id="framebot" name="Framebot" version="0.1.0">
2 <description> to coorect frameshift and classify nearest neighbor</description>
3
4 <requirements>
5 <requirement type="package" version="2.0.2">rdptools</requirement>
6 </requirements>
7
8 <stdio>
9 <exit_code range="1:" />
10 </stdio>
11
12 <command><![CDATA[
13 java -jar \${RDP_TOOLS_DIR}/FrameBot.jar framebot
14 -a $alignment_mode
15 -i $identity_cutoff
16 -k $knn
17 -l $length_cutoff
18
19 #if $no_metric_search.no_metric_search_test
20 -N
21 -e $no_metric_search.gap_ext_penalty
22 -f $no_metric_search.frameshift_penalty
23 -g $no_metric_search.gap_open_penalty
24 #else
25 -m $no_metric_search.max_radius
26 #end if
27
28 -o output
29
30 #if str( $databases.databases_selector ) == 'history'
31 $databases.databases_name
32 #else
33 #set $data_table = dict([(_[0], _[2]) for _ in $databases.databases_input.input.options.tool_data_table.data])
34 $data_table[$databases.databases_input.value]
35 #end if
36
37 $framebot_input_sequence_file
38 ]]></command>
39
40 <inputs>
41 <param name="framebot_input_sequence_file" type="data" format="fasta"
42 label="Input sequence file" help=""/>
43
44 <conditional name="databases">
45 <param name="databases_selector" type="select" label="Databases to query" help="">
46 <option value="cached" selected="true">Public reference gene databases</option>
47 <option value="history">Databases from your history</option>
48 </param>
49 <when value="cached">
50 <param name="databases_input" label="Reference gene databases" type="select" display="radio">
51 <options from_data_table="framebot_ref_gene_databases" />
52 <validator type="no_options" message="Select at least one database"/>
53 </param>
54 </when>
55 <when value="history">
56 <param name="databases_name" type="data" format="fasta" label="Reference gene database"
57 multiple="false" help=""/>
58 </when>
59 </conditional>
60
61 <param name="alignment_mode" type="select" display="radio"
62 label="Alignment mode" help="">
63 <option value="glocal">Glocal</option>
64 <option value="local">Local</option>
65 <option value="global">Global</option>
66 </param>
67
68 <param name="identity_cutoff" type="float" min="0" max="1" value="0.4"
69 label="Percent identity cutoff" help=""/>
70
71 <conditional name="no_metric_search">
72 <param name="no_metric_search_test" type='boolean' checked="true" label="Disable
73 metric search?" help="Provide fasta file of seeds instead of
74 index file"/>
75 <when value="true">
76 <param name="gap_ext_penalty" type="integer" min="-10" max="0"
77 value="-1" label="Gap extension penalty" help=""/>
78 <param name="frameshift_penalty" type="integer" min="-20" max="0"
79 value="-10" label="Frameshift penalty" help=""/>
80 <param name="gap_open_penalty" type="integer" min="-20" max="0"
81 value="-10" label="Gap opening penalty" help=""/>
82 </when>
83 <when value="false">
84 <param name="max_radius" type="float" min="1" max="2147483647"
85 value="100" label="Maximum radius" help=""/>
86 </when>
87 </conditional>
88
89 <param name="knn" type="integer" min="0" max="100" value="10"
90 label="The top k closest protein targets" help=""/>
91 <param name="length_cutoff" type="integer" min="0" max="100" value="80"
92 label="Length cutoff in number of amino acids" help=""/>
93
94 <param name="transl_table" type="select" display="radio" label="Protein
95 translation table to use" help="NCBI Translation Tables">
96 <option value="1">Standard Code</option>
97 <option value="2">Vertebrate Mitochondrial Code</option>
98 <option value="3">Yeast Mitochondrial Code</option>
99 <option value="4">Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code</option>
100 <option value="5">Invertebrate Mitochondrial Code</option>
101 <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear Code</option>
102 <option value="9">Echinoderm and Flatworm Mitochondrial Code</option>
103 <option value="10">Euplotid Nuclear Code</option>
104 <option value="11" selected="true">Bacterial, Archaeal and Plant Plastid Code</option>
105 <option value="12">Alternative Yeast Nuclear Code</option>
106 <option value="13">Ascidian Mitochondrial Code</option>
107 <option value="14">Alternative Flatworm Mitochondrial Code</option>
108 <option value="16">Chlorophycean Mitochondrial Code</option>
109 <option value="21">Trematode Mitochondrial Code</option>
110 <option value="22">Scenedesmus obliquus Mitochondrial Code</option>
111 <option value="23">Thraustochytrium Mitochondrial Code</option>
112 <option value="24">Pterobranchia Mitochondrial Code</option>
113 <option value="25">Candidate Division SR1 and Gracilibacteria Code</option>
114 </param>
115
116 <param name="word_size" type="integer" min="3" max="6" value="4"
117 label="Word size used to find closest protein target" help=""/>
118 </inputs>
119
120 <outputs>
121 <data format="txt" name="conserved_alignment_file"
122 from_work_dir="output_framebot.txt"
123 label="Conserved alignments to the neared match of ${on_string} (Framebot)" />
124 <data format="fasta" name="corr_nucl_output"
125 from_work_dir="output_corr_nucl.fasta"
126 label="Frameshift-corrected nucleotide sequences of ${on_string} (Framebot)" />
127 <data format="fasta" name="corr_prot_output"
128 from_work_dir="output_corr_prot.fasta"
129 label="Frameshift-corrected protein sequences of ${on_string} (Framebot)" />
130
131 <data format="txt" name="failed_alignment_file"
132 from_work_dir="output_failed_framebot.txt"
133 label="Rejected alignments to the neared match of ${on_string} (Framebot)" />
134 <data format="fasta" name="failed_nucl_output"
135 from_work_dir="output_failed_nucl.fasta"
136 label="Non frameshift-corrected nucleotide sequences of ${on_string} (Framebot)" />
137 </outputs>
138
139 <tests>
140 <test>
141 <param name="framebot_input_sequence_file" value="framebot_input_sequence.fasta"/>
142 <param name="databases_selector" value="cached" />
143 <param name="databases_input" value="amoA_prot_ref" />
144 <param name="alignment_mode" value="glocal" />
145 <param name="denovo_abund_cutoff" value="10" />
146 <param name="denovo_id_cutoff" value="0.7" />
147 <param name="identity_cutoff" value="0.4" />
148 <param name="no_metric_search_test" value="true" />
149 <param name="gap_ext_penalty" value="-1" label="Gap extension penalty" help=""/>
150 <param name="frameshift_penalty" value="-10" label="Frameshift penalty" help=""/>
151 <param name="gap_open_penalty" value="-10" label="Gap opening penalty" help=""/>
152 <param name="no_prefilter" value="false" />
153 <param name="scoring_matrix" value="Blosum62"/>
154 <param name="knn" value="10" />
155 <param name="length_cutoff" value="80"/>
156 <param name="transl_table" value="11"/>
157 <param name="word_size" value="4"/>
158 <param name="de_novo" value="false"/>
159
160 <output name="conserved_alignment_file" file="framebot_conserved_alignment_file.txt"/>
161 <output name="corr_nucl_output" file="framebot_corr_nucl_output.fasta"/>
162 <output name="corr_prot_output" file="framebot_corr_prot_output.fasta"/>
163 <output name="failed_alignment_file" file="framebot_failed_alignment_file.txt"/>
164 <output name="failed_nucl_output" file="framebot_failed_nucl_output.fasta"/>
165 </test>
166 </tests>
167
168 <help><![CDATA[
169
170 **What it does**
171
172 RDP FrameBot is a frameshift correction and nearest neighbor classification tool for use with high-throughput amplicon sequencing. It uses a dynamic programming algorithm to align each query DNA sequence against a set of target protein sequences, produces frameshift-corrected protein and DNA sequences and an optimal global or local protein alignment.
173 More information on `Github repository <https://github.com/rdpstaff/Framebot>`_.
174
175 -----
176
177 **Input**
178
179 One protein reference fasta file or index file, and one DNA query fasta file are required.
180
181 Several reference sets for a list of genes are available.
182 But personal own set of reference sequences can be provide as representative of the gene of interest (`http://fungene.cme.msu.edu <http://fungene.cme.msu.edu>`_ is a good resource).
183 The reference set must contain protein or DNA representative sequences of the gene target and should be compiled to have a good coverage of diversity of the gene family. FrameBot is significantly more accurate when the nearest target protein sequence (from the reference set) is at least 50% identical to the query read. Running FrameBot is computationally intensive in no-metric-search mode because it performs all-against-all comparisons between query DNA and the target protein sequences. Therefore we recommend limiting your reference set to 200 protein sequences for no-metric-search mode. The index metic-search mode gains more than 10-fold speedup by reducing the number of comparisons (see FrameBot citation). A larger DNA reference set can be used.
184
185 -----
186
187 **Parameters**
188
189 The parameters are numerous in Framebot
190
191 - The alignment mode: glocal, local or global
192 - The minimum abundance for de-novo mode
193 - The maxmimum aa identity cutoff for de-novo mode
194 - The Percent identity cutoff
195 - The top k closest protein targets
196 - Length cutoff in number of amino acids
197 - Disable metric search (provide fasta file of seeds instead of index file)
198 - Result file name stem
199 - Disable the pre-filtering step for non-metric search
200 - Sequence quality data
201 - Protein translation table to use
202 - The word size used to find closest protein targets
203 - ...
204
205 -----
206
207 **Output**
208
209 The framebot step produces five output files:
210
211 - the alignment to the nearest match satisfying the minimum length and protein identity cutoff.
212 - the frameshift-corrected nucleotide and protein sequences satisfying the minimum length and protein identity cutoff.
213 - the alignment to the nearest match that failed the minimum length and protein identity cutoff.
214 - a fasta file containing the nucleotide sequences that failed the minimum length and protein identity cutoff.
215
216 ]]></help>
217
218 <citations>
219 <citation type="doi">10.1128/mBio.00592-13</citation>
220 </citations>
221 </tool>