26
|
1 <tool id="plant_tribes_kaks_analysis" name="KaKsAnalysis" version="@WRAPPER_VERSION@.0">
|
25
|
2 <description>estimates paralogous and orthologous pairwise synonymous (Ks) and non-synonymous (Ka) substitution rates</description>
|
26
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements_kaks_analysis" />
|
|
7 <expand macro="stdio" />
|
0
|
8 <command>
|
|
9 <![CDATA[
|
5
|
10 #set output_dir = 'kaksAnalysis_dir'
|
4
|
11 #set comparison = $comparison_cond.comparison
|
0
|
12 #if str($options_type.options_type_selector) == 'advanced':
|
|
13 #set codeml_ctl_file_cond = $options_type.codeml_ctl_file_cond
|
|
14 #set codeml_ctl_file_select = $codeml_ctl_file_cond.codeml_ctl_file_select
|
|
15 #set fit_components_cond = $options_type.fit_components_cond
|
|
16 #set fit_components = $fit_components_cond.fit_components
|
11
|
17 #set recalibrate_cond = $options_type.recalibrate_cond
|
|
18 #set recalibrate = $recalibrate_cond.recalibrate
|
|
19 #set set_min_coverage_cond = $options_type.set_min_coverage_cond
|
|
20 #set set_min_coverage = $set_min_coverage_cond.set_min_coverage
|
17
|
21 #set set_lower_ks_limit_cond = $options_type.set_lower_ks_limit_cond
|
16
|
22 #set set_lower_ks_limit = $set_lower_ks_limit_cond.set_lower_ks_limit
|
17
|
23 #set set_upper_ks_limit_cond = $options_type.set_upper_ks_limit_cond
|
16
|
24 #set set_upper_ks_limit = $set_upper_ks_limit_cond.set_upper_ks_limit
|
6
|
25 #else:
|
|
26 #set codeml_ctl_file_select = 'no'
|
|
27 #set fit_components = 'no'
|
16
|
28 #set set_lower_ks_limit = 'no'
|
|
29 #set set_upper_ks_limit = 'no'
|
0
|
30 #end if
|
|
31 KaKsAnalysis
|
|
32 --num_threads \${GALAXY_SLOTS:-4}
|
|
33 --coding_sequences_species_1 '$coding_sequences_species_1'
|
|
34 --proteins_species_1 '$proteins_species_1'
|
|
35 --comparison $comparison
|
4
|
36 #if str($comparison) == 'orthologs':
|
0
|
37 --coding_sequences_species_2 '$comparison_cond.coding_sequences_species_2'
|
|
38 --proteins_species_2 '$comparison_cond.proteins_species_2'
|
|
39 #end if
|
|
40 #if str($options_type.options_type_selector) == 'advanced':
|
11
|
41 #if str($set_min_coverage) == 'yes':
|
|
42 --min_coverage $set_min_coverage_cond.min_coverage
|
|
43 #end if
|
|
44 #if str($recalibrate) == 'yes':
|
|
45 --recalibration_rate $recalibrate_cond.recalibration_rate
|
|
46 #end if
|
0
|
47 #if str($codeml_ctl_file_select) == 'yes':
|
|
48 --codeml_ctl_file '$codeml_ctl_file_cond.codeml_ctl_file'
|
|
49 # No else block needed here because the default codeml_ctl config
|
|
50 # will be used if the --codeml_ctl_file flag is missing.
|
|
51 #end if
|
|
52 #if str($fit_components) == 'yes':
|
10
|
53 --fit_components
|
0
|
54 --num_of_components $fit_components_cond.num_of_components
|
16
|
55 #end if
|
|
56 #if str($set_lower_ks_limit) == 'yes':
|
|
57 --min_ks $set_lower_ks_limit_cond.min_ks
|
|
58 #end if
|
|
59 #if str($set_upper_ks_limit) == 'yes':
|
|
60 --max_ks $set_upper_ks_limit_cond.max_ks
|
0
|
61 #end if
|
|
62 #end if
|
7
|
63 >/dev/null
|
5
|
64 && mv $output_dir/species1.fna '$output_species1_fna'
|
|
65 && mv $output_dir/species1.faa '$output_species1_faa'
|
9
|
66 #if str($comparison) == 'paralogs':
|
5
|
67 && mv $output_dir/species1.fna.blastn.paralogs '$output_species1_paralog'
|
9
|
68 #else:
|
5
|
69 && mv $output_dir/species2.faa '$output_species2_faa'
|
|
70 && mv $output_dir/species2.fna '$output_species2_fna'
|
|
71 && mv $output_dir/species1.fna.blastn.orthologs '$output_species1_ortholog'
|
|
72 && mv $output_dir/species2.fna.blastn.orthologs '$output_species2_ortholog'
|
|
73 #end if
|
|
74 && mv $output_dir/*.rbhb '$output_rbhb'
|
|
75 && mv $output_dir/*.kaks '$output_kaks'
|
|
76 #if str($fit_components) == 'yes':
|
|
77 && mv $output_dir/*.components '$output_components'
|
|
78 #end if
|
0
|
79 ]]>
|
|
80 </command>
|
|
81 <inputs>
|
15
|
82 <param name="coding_sequences_species_1" format="fasta" type="data" label="Coding sequences (CDS) fasta file for species1" />
|
|
83 <param name="proteins_species_1" format="fasta" type="data" label="Aamino acids (proteins) sequences fasta file for species1" />
|
0
|
84 <conditional name="comparison_cond">
|
|
85 <param name="comparison" type="select" label="Select method for pairwise sequence comparison to determine homolgous pairs" help="Cross species comparison requires selection of inputs for second species">
|
|
86 <option value="paralogs" selected="true">Self species comparison</option>
|
|
87 <option value="orthologs">Cross species comparison</option>
|
|
88 </param>
|
|
89 <when value="paralogs" />
|
|
90 <when value="orthologs">
|
15
|
91 <param name="coding_sequences_species_2" format="fasta" type="data" label="Coding sequences (CDS) fasta file for species2" />
|
|
92 <param name="proteins_species_2" format="fasta" type="data" label="Aamino acids (proteins) sequences fasta file for species2" />
|
0
|
93 </when>
|
|
94 </conditional>
|
|
95 <conditional name="options_type">
|
|
96 <param name="options_type_selector" type="select" label="Options Configuration">
|
|
97 <option value="basic" selected="true">Basic</option>
|
|
98 <option value="advanced">Advanced</option>
|
|
99 </param>
|
|
100 <when value="basic" />
|
|
101 <when value="advanced">
|
11
|
102 <conditional name="set_min_coverage_cond">
|
|
103 <param name="set_min_coverage" type="select" label="Specify minimum sequence pairwise coverage length between homologous pairs?">
|
|
104 <option value="no" selected="true">No</option>
|
|
105 <option value="yes">Yes</option>
|
|
106 </param>
|
|
107 <when value="no" />
|
|
108 <when value="yes">
|
|
109 <param name="min_coverage" type="float" value="0.5" min="0.3" max="1.0" label="Minimum sequence pairwise coverage length between homologous pairs" />
|
|
110 </when>
|
|
111 </conditional>
|
|
112 <conditional name="recalibrate_cond">
|
|
113 <param name="recalibrate" type="select" label="Specify evolutionary rate for recalibrating synonymous subsitutions (ks) of species?">
|
|
114 <option value="no" selected="true">No</option>
|
|
115 <option value="yes">Yes</option>
|
|
116 </param>
|
|
117 <when value="no" />
|
|
118 <when value="yes">
|
|
119 <param name="recalibration_rate" type="float" value="0.0" min="0.0" label="Evolutionary rate for recalibrating synonymous subsitutions (ks) of species" />
|
|
120 </when>
|
|
121 </conditional>
|
0
|
122 <conditional name="codeml_ctl_file_cond">
|
|
123 <param name="codeml_ctl_file_select" type="select" label="Select PAML codeml control file?" help="Used for ML analysis of protein-coding DNA sequences using codon substitution models, select No to use the default control file">
|
|
124 <option value="no" selected="true">No</option>
|
|
125 <option value="yes">Yes</option>
|
|
126 </param>
|
|
127 <when value="no" />
|
|
128 <when value="yes">
|
|
129 <param name="codeml_ctl_file" format="txt" type="data" label="PAML codeml control file" />
|
|
130 </when>
|
|
131 </conditional>
|
|
132 <conditional name="fit_components_cond">
|
|
133 <param name="fit_components" type="select" label="Fit a mixture model of multivariate normal components to synonymous (ks) distribution?" help="Used to identify significant duplication events in a genome">
|
|
134 <option value="no" selected="true">No</option>
|
|
135 <option value="yes">Yes</option>
|
|
136 </param>
|
|
137 <when value="no" />
|
|
138 <when value="yes">
|
22
|
139 <param name="num_of_components" type="integer" value="1" min="1" label="Number of components to fit to synonymous subsitutions (ks) distribution" />
|
16
|
140 </when>
|
|
141 </conditional>
|
|
142 <conditional name="set_lower_ks_limit_cond">
|
|
143 <param name="set_lower_ks_limit" type="select" label="Set lower limit of synonymous subsitutions (ks)?" help="Reduces background noise from young paralogous pairs due to normal gene births and deaths in a genome">
|
|
144 <option value="no" selected="true">No</option>
|
|
145 <option value="yes">Yes</option>
|
|
146 </param>
|
|
147 <when value="no" />
|
|
148 <when value="yes">
|
|
149 <param name="min_ks" type="float" value="0.0" min="0.0" label="Lower limit of synonymous subsitutions (ks)" />
|
|
150 </when>
|
|
151 </conditional>
|
|
152 <conditional name="set_upper_ks_limit_cond">
|
|
153 <param name="set_upper_ks_limit" type="select" label="Set upper limit of synonymous subsitutions (ks)?" help="Excludes likey ancient paralogous pairs">
|
|
154 <option value="no" selected="true">No</option>
|
|
155 <option value="yes">Yes</option>
|
|
156 </param>
|
|
157 <when value="no" />
|
|
158 <when value="yes">
|
|
159 <param name="max_ks" type="float" value="0.0" min="0.0" label="Upper limit of synonymous subsitutions (ks)" />
|
0
|
160 </when>
|
|
161 </conditional>
|
|
162 </when>
|
|
163 </conditional>
|
15
|
164 <!-- Required due to the Emmix license -->
|
14
|
165 <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
|
|
166 <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
|
|
167 </param>
|
0
|
168 </inputs>
|
|
169 <outputs>
|
5
|
170 <data name="output_species1_fna" format="fasta" label="KaKs analysis (coding sequences) on ${on_string}" />
|
|
171 <data name="output_species1_faa" format="fasta" label="KaKs analysis (amino acids) on ${on_string}" />
|
|
172 <data name="output_species2_fna" format="fasta" label="KaKs analysis (coding sequences) on ${on_string}">
|
|
173 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
174 </data>
|
|
175 <data name="output_species2_faa" format="fasta" label="KaKs analysis (amino acids) on ${on_string}">
|
|
176 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
177 </data>
|
|
178 <data name="output_species1_paralog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
|
|
179 <filter>comparison_cond['comparison'] == 'paralogs'</filter>
|
|
180 </data>
|
|
181 <data name="output_species1_ortholog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
|
|
182 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
183 </data>
|
|
184 <data name="output_species2_ortholog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
|
|
185 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
186 </data>
|
|
187 <data name="output_rbhb" format="tabular" label="KaKs analysis (paralogous pairs) on ${on_string}" />
|
|
188 <data name="output_kaks" format="tabular" label="KaKs analysis on ${on_string}" />
|
|
189 <data name="output_components" format="tabular" label="KaKs analysis (significant components in the ks distribution) on ${on_string}">
|
19
|
190 <filter>options_type['options_type_selector'] == 'advanced' and options_type['fit_components_cond']['fit_components'] == 'yes'</filter>
|
5
|
191 </data>
|
0
|
192 </outputs>
|
|
193 <tests>
|
|
194 <test>
|
|
195 </test>
|
|
196 </tests>
|
|
197 <help>
|
24
|
198 This tool is one of the PlantTribes collection of automated modular analysis pipelines that utilize objective classifications of
|
1
|
199 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool performs orthologous
|
|
200 or paralogous ks analyses of coding sequences and amino acid sequences.
|
0
|
201
|
|
202 -----
|
|
203
|
|
204 **Options**
|
|
205
|
15
|
206 * **Required**
|
0
|
207
|
15
|
208 - **Coding sequences (CDS) fasta file for species1** - Coding sequences (CDS) fasta file for species1.
|
|
209 - **Aamino acids (proteins) sequences fasta file for species1** - Aamino acids (proteins) sequences fasta file for species1
|
|
210 - **Select method for pairwise sequence comparison to determine homolgous pairs** - Pairwise sequence comparison to determine homolgous pairs (cross species comparison requires selection of inputs for species2).
|
0
|
211
|
15
|
212 * **Optional**
|
0
|
213
|
12
|
214 - **Minimum sequence pairwise coverage length between homologous pairs** - Minimum sequence pairwise coverage length between homologous pairs (e.g., 0.5 results in 50% coverage. Legal values lie between 0.3 and 1.0.
|
|
215 - **Evolutionary rate for recalibrating synonymous subsitutions (ks) of species** - (applies to paralogous ks analysis) Recalibrate synonymous subsitutions (ks) of species using a predetermined evoutionary rate that can be determined from a species tree inferred from a collection single copy genes from taxa of interest (Cui et al., 2006).
|
|
216 - **Select PAML codeml control file?** - Select PAML's codeml control file from your history. This file is used to to perfom ML analysis of protein-coding DNA sequences using codon substitution models. Selecting No uses the default file which does not include input (seqfile, treefile) and output (outfile) parameters of codeml.
|
|
217 - **Fit a mixture model of multivariate normal components to synonymous (ks) distribution?** - Fit a mixture model of multivariate normal components to synonymous (ks) distribution to identify significant duplication event(s) in a genome.
|
|
218 - **Number components to fit to synonymous subsitutions (ks) distribution** - Number components to fit to synonymous subsitutions (ks) distribution.
|
|
219 - **Lower limit of synonymous subsitutions (ks)** - Lower limit of synonymous subsitutions (ks) - necessary if fitting components to the distribution to reduce background noise from young paralogous pairs due to normal gene births and deaths in a genome.
|
|
220 - **Upper limit of synonymous subsitutions (ks)** - Upper limit of synonymous subsitutions (ks) - necessary if fitting components to the distribution to exclude likey ancient paralogous pairs.
|
0
|
221
|
|
222 </help>
|
|
223 <citations>
|
26
|
224 <expand macro="citation1" />
|
12
|
225 <citation type="doi">10.1093/bioinformatics/btw412</citation>
|
0
|
226 <citation type="doi">10.1186/1471-2105-10-421</citation>
|
|
227 <citation type="doi">10.1093/molbev/msm088</citation>
|
|
228 <citation type="doi">10.18637/jss.v004.i02</citation>
|
|
229 </citations>
|
|
230 </tool>
|