41
|
1 <tool id="plant_tribes_kaks_analysis" name="KaKsAnalysis" version="@WRAPPER_VERSION@.3.0">
|
25
|
2 <description>estimates paralogous and orthologous pairwise synonymous (Ks) and non-synonymous (Ka) substitution rates</description>
|
26
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
41
|
6 <requirements>
|
|
7 <requirement type="package" version="1.0.3">plant_tribes_kaks_analysis</requirement>
|
|
8 </requirements>
|
37
|
9 <command detect_errors="exit_code"><![CDATA[
|
|
10 #set output_dir = 'kaksAnalysis_dir'
|
|
11 #set comparison = $comparison_cond.comparison
|
|
12 #if str($options_type.options_type_selector) == 'advanced':
|
|
13 #set codeml_ctl_file_cond = $options_type.codeml_ctl_file_cond
|
|
14 #set codeml_ctl_file_select = $codeml_ctl_file_cond.codeml_ctl_file_select
|
|
15 #set fit_components_cond = $options_type.fit_components_cond
|
|
16 #set fit_components = $fit_components_cond.fit_components
|
|
17 #set recalibrate_cond = $options_type.recalibrate_cond
|
|
18 #set recalibrate = $recalibrate_cond.recalibrate
|
|
19 #set set_min_coverage_cond = $options_type.set_min_coverage_cond
|
|
20 #set set_min_coverage = $set_min_coverage_cond.set_min_coverage
|
|
21 #set set_lower_ks_limit_cond = $options_type.set_lower_ks_limit_cond
|
|
22 #set set_lower_ks_limit = $set_lower_ks_limit_cond.set_lower_ks_limit
|
|
23 #set set_upper_ks_limit_cond = $options_type.set_upper_ks_limit_cond
|
|
24 #set set_upper_ks_limit = $set_upper_ks_limit_cond.set_upper_ks_limit
|
|
25 #else:
|
|
26 #set codeml_ctl_file_select = 'no'
|
|
27 #set fit_components = 'no'
|
|
28 #set set_lower_ks_limit = 'no'
|
|
29 #set set_upper_ks_limit = 'no'
|
|
30 #end if
|
|
31 KaKsAnalysis
|
|
32 --num_threads \${GALAXY_SLOTS:-4}
|
|
33 --coding_sequences_species_1 '$coding_sequences_species_1'
|
|
34 --proteins_species_1 '$proteins_species_1'
|
|
35 --comparison $comparison
|
|
36 #if str($comparison) == 'orthologs':
|
|
37 --coding_sequences_species_2 '$comparison_cond.coding_sequences_species_2'
|
|
38 --proteins_species_2 '$comparison_cond.proteins_species_2'
|
43
|
39 #if str($comparison_cond.blast_option) == 'crbblast':
|
42
|
40 --crb_blast
|
|
41 #end if
|
37
|
42 #end if
|
|
43 #if str($options_type.options_type_selector) == 'advanced':
|
|
44 #if str($set_min_coverage) == 'yes':
|
|
45 --min_coverage $set_min_coverage_cond.min_coverage
|
|
46 #end if
|
|
47 #if str($recalibrate) == 'yes':
|
|
48 --recalibration_rate $recalibrate_cond.recalibration_rate
|
|
49 #end if
|
|
50 #if str($codeml_ctl_file_select) == 'yes':
|
|
51 --codeml_ctl_file '$codeml_ctl_file_cond.codeml_ctl_file'
|
|
52 # No else block needed here because the default codeml_ctl config
|
|
53 # will be used if the --codeml_ctl_file flag is missing.
|
|
54 #end if
|
|
55 #if str($fit_components) == 'yes':
|
|
56 --fit_components
|
|
57 --num_of_components $fit_components_cond.num_of_components
|
|
58 #end if
|
|
59 #if str($set_lower_ks_limit) == 'yes':
|
|
60 --min_ks $set_lower_ks_limit_cond.min_ks
|
|
61 #end if
|
|
62 #if str($set_upper_ks_limit) == 'yes':
|
|
63 --max_ks $set_upper_ks_limit_cond.max_ks
|
|
64 #end if
|
|
65 #end if
|
51
|
66 &>proc.log
|
37
|
67 && mv $output_dir/species1.fna '$output_species1_fna'
|
|
68 && mv $output_dir/species1.faa '$output_species1_faa'
|
|
69 #if str($comparison) == 'paralogs':
|
45
|
70 && mv $output_dir/species1.fna.blastn.paralogs '$output_species1_paralog'
|
|
71 && mv $output_dir/*.rbhb '$output_rbhb_paralog'
|
37
|
72 #else:
|
|
73 && mv $output_dir/species2.faa '$output_species2_faa'
|
|
74 && mv $output_dir/species2.fna '$output_species2_fna'
|
45
|
75 && mv $output_dir/species1.fna.blastn.orthologs '$output_species1_ortholog'
|
37
|
76 && mv $output_dir/species2.fna.blastn.orthologs '$output_species2_ortholog'
|
45
|
77 && mv $output_dir/*.rbhb '$output_rbhb_ortholog'
|
37
|
78 #end if
|
|
79 && mv $output_dir/*.kaks '$output_kaks'
|
|
80 #if str($fit_components) == 'yes':
|
|
81 && mv $output_dir/*.components '$output_components'
|
|
82 #end if
|
|
83 ]]></command>
|
0
|
84 <inputs>
|
27
|
85 <param name="coding_sequences_species_1" format="fasta" type="data" label="Coding sequences for the first species" />
|
|
86 <param name="proteins_species_1" format="fasta" type="data" label="Protein sequences for the first species" />
|
0
|
87 <conditional name="comparison_cond">
|
27
|
88 <param name="comparison" type="select" label="Type of sequence comparison">
|
|
89 <option value="paralogs" selected="true">Paralogous</option>
|
|
90 <option value="orthologs">Orthologous</option>
|
0
|
91 </param>
|
|
92 <when value="paralogs" />
|
|
93 <when value="orthologs">
|
27
|
94 <param name="coding_sequences_species_2" format="fasta" type="data" label="Coding sequences for the second species" />
|
|
95 <param name="proteins_species_2" format="fasta" type="data" label="Protein sequences for the second species" />
|
42
|
96 <param name="blast_option" type="select" display="radio" label="Determine for cross-species orthologs using">
|
|
97 <option value="blast" selected="true">reciprocal best BLAST</option>
|
|
98 <option value="crbblast">conditional reciprocal best BLAST</option>
|
|
99 </param>
|
0
|
100 </when>
|
|
101 </conditional>
|
|
102 <conditional name="options_type">
|
|
103 <param name="options_type_selector" type="select" label="Options Configuration">
|
|
104 <option value="basic" selected="true">Basic</option>
|
|
105 <option value="advanced">Advanced</option>
|
|
106 </param>
|
|
107 <when value="basic" />
|
|
108 <when value="advanced">
|
11
|
109 <conditional name="set_min_coverage_cond">
|
27
|
110 <param name="set_min_coverage" type="select" label="Alignment coverage configuration">
|
11
|
111 <option value="no" selected="true">No</option>
|
|
112 <option value="yes">Yes</option>
|
|
113 </param>
|
|
114 <when value="no" />
|
|
115 <when value="yes">
|
27
|
116 <param name="min_coverage" type="float" value="0.5" min="0.3" max="1.0" label="match score" />
|
11
|
117 </when>
|
|
118 </conditional>
|
|
119 <conditional name="recalibrate_cond">
|
27
|
120 <param name="recalibrate" type="select" label="Species rates recalibration configuration">
|
11
|
121 <option value="no" selected="true">No</option>
|
|
122 <option value="yes">Yes</option>
|
|
123 </param>
|
|
124 <when value="no" />
|
|
125 <when value="yes">
|
27
|
126 <param name="recalibration_rate" type="float" value="0.0" min="0.0" label="Recalibration rate" />
|
11
|
127 </when>
|
|
128 </conditional>
|
0
|
129 <conditional name="codeml_ctl_file_cond">
|
27
|
130 <param name="codeml_ctl_file_select" type="select" label="PAML codeml configuration">
|
0
|
131 <option value="no" selected="true">No</option>
|
|
132 <option value="yes">Yes</option>
|
|
133 </param>
|
|
134 <when value="no" />
|
|
135 <when value="yes">
|
|
136 <param name="codeml_ctl_file" format="txt" type="data" label="PAML codeml control file" />
|
|
137 </when>
|
|
138 </conditional>
|
|
139 <conditional name="fit_components_cond">
|
27
|
140 <param name="fit_components" type="select" label="Rates clustering configuration">
|
0
|
141 <option value="no" selected="true">No</option>
|
|
142 <option value="yes">Yes</option>
|
|
143 </param>
|
|
144 <when value="no" />
|
|
145 <when value="yes">
|
27
|
146 <param name="num_of_components" type="integer" value="1" min="1" label="Number of components" />
|
16
|
147 </when>
|
|
148 </conditional>
|
|
149 <conditional name="set_lower_ks_limit_cond">
|
27
|
150 <param name="set_lower_ks_limit" type="select" label="Lower limit synonymous subsitution rates configuration">
|
16
|
151 <option value="no" selected="true">No</option>
|
|
152 <option value="yes">Yes</option>
|
|
153 </param>
|
|
154 <when value="no" />
|
|
155 <when value="yes">
|
27
|
156 <param name="min_ks" type="float" value="0.0" min="0.0" label="Minimum rate" />
|
16
|
157 </when>
|
|
158 </conditional>
|
|
159 <conditional name="set_upper_ks_limit_cond">
|
27
|
160 <param name="set_upper_ks_limit" type="select" label="Upper limit synonymous subsitution rates configuration">
|
16
|
161 <option value="no" selected="true">No</option>
|
|
162 <option value="yes">Yes</option>
|
|
163 </param>
|
|
164 <when value="no" />
|
|
165 <when value="yes">
|
27
|
166 <param name="max_ks" type="float" value="0.0" min="0.0" label="Maximum rate" />
|
0
|
167 </when>
|
|
168 </conditional>
|
|
169 </when>
|
|
170 </conditional>
|
15
|
171 <!-- Required due to the Emmix license -->
|
14
|
172 <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
|
|
173 <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
|
|
174 </param>
|
0
|
175 </inputs>
|
|
176 <outputs>
|
43
|
177 <data name="output_species1_fna" format="fasta" label="${tool.name} (coding sequences species1) on ${on_string}" />
|
|
178 <data name="output_species1_faa" format="fasta" label="${tool.name} (amino acids species1) on ${on_string}" />
|
|
179 <data name="output_species2_fna" format="fasta" label="${tool.name} (coding sequences species2) on ${on_string}">
|
5
|
180 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
181 </data>
|
43
|
182 <data name="output_species2_faa" format="fasta" label="${tool.name} (amino acids species2) on ${on_string}">
|
5
|
183 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
184 </data>
|
45
|
185 <data name="output_species1_paralog" format="tabular" label="${tool.name} (blastn results species1 vs species1) on ${on_string}">
|
44
|
186 <filter>comparison_cond['comparison'] == 'paralogs'</filter>
|
|
187 </data>
|
45
|
188 <data name="output_species1_ortholog" format="tabular" label="${tool.name} (blastn results species1 vs species2) on ${on_string}">
|
44
|
189 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
190 </data>
|
43
|
191 <data name="output_species2_ortholog" format="tabular" label="${tool.name} (blastn results species2 vs species1) on ${on_string}">
|
5
|
192 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
193 </data>
|
45
|
194 <data name="output_rbhb_paralog" format="tabular" label="${tool.name} (paralogous pairs) on ${on_string}">
|
44
|
195 <filter>comparison_cond['comparison'] == 'paralogs'</filter>
|
|
196 </data>
|
45
|
197 <data name="output_rbhb_ortholog" format="tabular" label="${tool.name} (orthologous pairs) on ${on_string}">
|
44
|
198 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
199 </data>
|
43
|
200 <data name="output_kaks" format="tabular" label="${tool.name} (KaKs distribution) on ${on_string}" />
|
51
|
201 <data name="output_components" format="ptkscmp" label="${tool.name} (significant components in the KaKs distribution) on ${on_string}">
|
19
|
202 <filter>options_type['options_type_selector'] == 'advanced' and options_type['fit_components_cond']['fit_components'] == 'yes'</filter>
|
5
|
203 </data>
|
0
|
204 </outputs>
|
|
205 <tests>
|
|
206 <test>
|
36
|
207 <param name="coding_sequences_species_1" value="species1_cds.fasta" ftype="fasta"/>
|
|
208 <param name="proteins_species_1" value="species1_pep.fasta" ftype="fasta"/>
|
38
|
209 <param name="non_commercial_use" value="True"/>
|
36
|
210 <output name="output_species1_fna" file="species1_cds.fasta" ftype="fasta" compare="contains"/>
|
|
211 <output name="output_species1_faa" file="species1_pep.fasta" ftype="fasta" compare="contains"/>
|
45
|
212 <output name="output_species1_paralog" file="output_blastn_results1.tabular" ftype="tabular" compare="contains"/>
|
|
213 <output name="output_rbhb_paralog" file="output_paralogous_pairs.tabular" ftype="tabular"/>
|
36
|
214 <output name="output_kaks" file="output1.tabular" ftype="tabular"/>
|
0
|
215 </test>
|
|
216 </tests>
|
|
217 <help>
|
27
|
218 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary analyses
|
|
219 of genome-scale gene families and transcriptomes. This tool estimates paralogous and orthologous pairwise synonymous (Ks) and
|
|
220 non-synonymous (Ka) substitution rates for a set of gene coding sequences either produced by the AssemblyPostProcessor tool or
|
|
221 from an external source. Optionally, the resulting set of estimated Ks values can be clustered into components using a mixture
|
|
222 of multivariate normal distributions to identify significant duplication event(s) in a species or a pair of species.
|
0
|
223
|
|
224 -----
|
|
225
|
34
|
226 * **Required options**
|
0
|
227
|
29
|
228 - **Coding sequences for the first species** - coding sequence fasta file for the first species either produced by the AssemblyPostProcessor tool or from an external source selected from your history.
|
|
229 - **Protein sequences for the first species** - corresponding protein sequence fasta files for the first species either produced by the AssemblyPostProcessor tool or from an external source selected from your history.
|
34
|
230 - **Type of sequence comparison** - pairwise sequence comparison to determine homologous pairs. This can be either paralogous for self-species comparison or orthologous for cross-species comparison. Cross-species comparison requires input for the second species.
|
0
|
231
|
34
|
232 * **Other options**
|
0
|
233
|
29
|
234 - **Coding sequences for the second species** - coding sequence fasta file for the second species either produced by the AssemblyPostProcessor tool or from an external source selected from your history. This option is required only for orthologous comparison.
|
|
235 - **Protein sequences for the second species** - corresponding protein sequence fasta files for the second species either produced by the AssemblyPostProcessor tool or from an external source selected from your history. This option is required only for orthologous comparison.
|
42
|
236
|
|
237 - **Determine for cross-species orthologs using** - select option for blast orthology.
|
|
238
|
|
239 - **reciprocal best BLAST** - use the default stringent reciprocal BLAST package for orthology assignment.
|
|
240 - **conditional reciprocal best BLAST** - use the CRB-BLAST package for orthology assignment which increases sensitivity to orthology comparisons and determines additional cross-species orthologs that are being left out by the defaul stringent reciprocal BLAST.
|
|
241
|
27
|
242 - **Alignment coverage configuration** - select 'Yes' to set the minimum allowable alignment coverage length between homologous pairs. PlantTribes uses global codon alignment match score to determine the pairwise alignment coverage. By default, the match score is set to 0.5 if 'No' is selected.
|
|
243
|
|
244 - **match score** - number of base matches in a pairwise sequence alignment divided by the length of shorter sequence. Positions in the alignment corresponding to gaps are not considered. The score is restricted to the range 0.3 - 1.0.
|
|
245
|
31
|
246 - **Species rates recalibration configuration** - select 'Yes' to recalibrate synonymous substitution rates of a species using a predetermined evolutionary rate. Recalibration evolutionary rate can be determined from a species tree inferred from a collection of conserved single copy genes from taxa of interest as described in [7]. Rate recalibration applies only to paralogous comparisons.
|
27
|
247
|
|
248 - **recalibration rate** - a predetermined evolutionary recalibration rate.
|
|
249
|
|
250 - **PAML codeml configuration** - select 'Yes' to enable selection of a PAML codeml control file to carry out maximum likelihood analysis of protein-coding DNA sequences using codon substitution models. Template file "codeml.ctl.args" can be found in the scaffold data installed into Galaxy via the PlantTribes Scaffolds Download Data Manager tool, and are also available at the PlantTribes GitHub `repository`_. Default settings shown in the template are used if 'No' is selected.
|
|
251 - **Rates clustering configuration** - select 'Yes' to estimate clusters of synonymous substitution rates using a mixture of multivariate normal distributions which represent putative duplication event(s).
|
|
252
|
|
253 - **Number of components** - number of components to include in the normal mixture model.
|
|
254
|
34
|
255 - **Lower limit synonymous substitution rates configuration** - select 'Yes' to set the minimum allowable synonymous substitution rate to use in the normal mixtures cluster analysis to exclude young paralogs that arise from normal gene births and deaths in a genome.
|
27
|
256
|
|
257 - **Minimum rate** - minimum allowable synonymous substitution rate.
|
|
258
|
34
|
259 - **Upper limit synonymous substitution rates configuration** - select 'Yes' to set the maximum allowable synonymous substitution rate to use in the normal mixtures cluster analysis to exclude likely ancient paralogs in a genome.
|
27
|
260
|
28
|
261 - **Maximum rate** - maximum allowable synonymous substitution rate.
|
27
|
262
|
28
|
263 .. _repository: https://github.com/dePamphilis/PlantTribes/blob/master/config/codeml.ctl.args
|
0
|
264
|
|
265 </help>
|
|
266 <citations>
|
26
|
267 <expand macro="citation1" />
|
27
|
268 <citation type="bibtex">
|
|
269 @article{Wall2008,
|
|
270 journal = {Nucleic Acids Research},
|
|
271 author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
|
|
272 title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
|
|
273 year = {2008},
|
|
274 volume = {36},
|
|
275 number = {suppl 1},
|
|
276 pages = {D970-D976},}
|
|
277 </citation>
|
|
278 <citation type="bibtex">
|
|
279 @article{Altschul1990,
|
|
280 journal = {Journal of molecular biology}
|
|
281 author = {3. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
|
|
282 title = {Basic local alignment search tool},
|
|
283 year = {1990},
|
|
284 volume = {215},
|
|
285 number = {3},
|
|
286 pages = {403-410},}
|
|
287 </citation>
|
|
288 <citation type="bibtex">
|
|
289 @article{Katoh2013,
|
|
290 journal = {Molecular biology and evolution},
|
|
291 author = {4. Katoh K, Standley DM},
|
|
292 title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability},
|
|
293 year = {2013},
|
|
294 volume = {30},
|
|
295 number = {4},
|
|
296 pages = {772-780},}
|
|
297 </citation>
|
|
298 <citation type="bibtex">
|
|
299 @article{Yang2007,
|
|
300 journal = {Molecular biology and evolution},
|
|
301 author = {5. Yang Z},
|
|
302 title = {PAML 4: phylogenetic analysis by maximum likelihood},
|
|
303 year = {2007},
|
|
304 volume = {24},
|
|
305 number = {8},
|
|
306 pages = {1586-1591},}
|
|
307 </citation>
|
|
308 <citation type="bibtex">
|
|
309 @article{McLachlan1999,
|
|
310 journal = {Journal of Statistical Software},
|
|
311 author = {6. McLachlan GJ, Peel D, Basford KE, Adams P},
|
|
312 title = {The EMMIX software for the fitting of mixtures of normal and t-components},
|
|
313 year = {1999},
|
|
314 volume = {4},
|
|
315 number = {2},
|
|
316 pages = {1-14},}
|
|
317 </citation>
|
31
|
318 <citation type="bibtex">
|
|
319 @article{Cui2006,
|
|
320 journal = {Genome Research},
|
|
321 author = {7. Cui L, Wall PK, Leebens-Mack JH, Lindsay BG, Soltis DE, Doyle JJ, Soltis PS, Carlson JE, Arumuganathan K, Barakat A, Albert VA},
|
|
322 title = {Widespread genome duplications throughout the history of flowering plants},
|
|
323 year = {2006},
|
|
324 volume = {16},
|
|
325 number = {6},
|
|
326 pages = {738-749},}
|
|
327 </citation>
|
0
|
328 </citations>
|
|
329 </tool>
|