1
|
1 <tool id="plant_tribes_kaks_analysis" name="Perform orthologous or paralogous ks analyses" version="0.7.0">
|
|
2 <description>of coding sequences and amino acid sequences</description>
|
0
|
3 <requirements>
|
|
4 <requirement type="package" version="0.7">plant_tribes_kaks_analysis</requirement>
|
|
5 </requirements>
|
|
6 <stdio>
|
|
7 <!-- Anything other than zero is an error -->
|
|
8 <exit_code range="1:" />
|
|
9 <exit_code range=":-1" />
|
|
10 <!-- In case the return code has not been set propery check stderr too -->
|
|
11 <regex match="Error:" />
|
|
12 <regex match="Exception:" />
|
|
13 </stdio>
|
|
14 <command>
|
|
15 <![CDATA[
|
5
|
16 #set output_dir = 'kaksAnalysis_dir'
|
4
|
17 #set comparison = $comparison_cond.comparison
|
0
|
18 #if str($options_type.options_type_selector) == 'advanced':
|
|
19 #set codeml_ctl_file_cond = $options_type.codeml_ctl_file_cond
|
|
20 #set codeml_ctl_file_select = $codeml_ctl_file_cond.codeml_ctl_file_select
|
|
21 #set fit_components_cond = $options_type.fit_components_cond
|
|
22 #set fit_components = $fit_components_cond.fit_components
|
|
23 #end if
|
|
24 KaKsAnalysis
|
|
25 --config_dir '$scaffold.fields.path'
|
|
26 --num_threads \${GALAXY_SLOTS:-4}
|
|
27 --coding_sequences_species_1 '$coding_sequences_species_1'
|
|
28 --proteins_species_1 '$proteins_species_1'
|
|
29 --comparison $comparison
|
4
|
30 #if str($comparison) == 'orthologs':
|
0
|
31 --coding_sequences_species_2 '$comparison_cond.coding_sequences_species_2'
|
|
32 --proteins_species_2 '$comparison_cond.proteins_species_2'
|
|
33 #end if
|
|
34 #if str($options_type.options_type_selector) == 'advanced':
|
|
35 --min_coverage $min_coverage
|
|
36 --recalibration_rate $recalibration_rate
|
|
37 #if str($codeml_ctl_file_select) == 'yes':
|
|
38 --codeml_ctl_file '$codeml_ctl_file_cond.codeml_ctl_file'
|
|
39 # No else block needed here because the default codeml_ctl config
|
|
40 # will be used if the --codeml_ctl_file flag is missing.
|
|
41 #end if
|
|
42 #if str($fit_components) == 'yes':
|
|
43 --num_of_components $fit_components_cond.num_of_components
|
|
44 --min_ks $fit_components_cond.min_ks
|
|
45 --max_ks $fit_components_cond.max_ks
|
|
46 #end if
|
|
47 #end if
|
5
|
48 && mv $output_dir/species1.fna '$output_species1_fna'
|
|
49 && mv $output_dir/species1.faa '$output_species1_faa'
|
|
50 #if str($comparison) == 'orthologs':
|
|
51 && mv $output_dir/species1.fna.blastn.paralogs '$output_species1_paralog'
|
|
52 #end if
|
|
53 #if str($comparison) == 'orthologs':
|
|
54 && mv $output_dir/species2.faa '$output_species2_faa'
|
|
55 && mv $output_dir/species2.fna '$output_species2_fna'
|
|
56 && mv $output_dir/species1.fna.blastn.orthologs '$output_species1_ortholog'
|
|
57 && mv $output_dir/species2.fna.blastn.orthologs '$output_species2_ortholog'
|
|
58 #end if
|
|
59 && mv $output_dir/*.rbhb '$output_rbhb'
|
|
60 && mv $output_dir/*.kaks '$output_kaks'
|
|
61 #if str($fit_components) == 'yes':
|
|
62 && mv $output_dir/*.components '$output_components'
|
|
63 #end if
|
0
|
64 ]]>
|
|
65 </command>
|
|
66 <inputs>
|
|
67 <param name="coding_sequences_species_1" format="fasta" type="data" label="Coding sequences (CDS) fasta file for the species" />
|
3
|
68 <param name="proteins_species_1" format="fasta" type="data" label="Aamino acids (proteins) sequences fasta file for the species" />
|
0
|
69 <conditional name="comparison_cond">
|
|
70 <param name="comparison" type="select" label="Select method for pairwise sequence comparison to determine homolgous pairs" help="Cross species comparison requires selection of inputs for second species">
|
|
71 <option value="paralogs" selected="true">Self species comparison</option>
|
|
72 <option value="orthologs">Cross species comparison</option>
|
|
73 </param>
|
|
74 <when value="paralogs" />
|
|
75 <when value="orthologs">
|
|
76 <param name="coding_sequences_species_2" format="fasta" type="data" label="Coding sequences (CDS) fasta file for the second species" />
|
3
|
77 <param name="proteins_species_2" format="fasta" type="data" label="Aamino acids (proteins) sequences fasta file for the second species" />
|
0
|
78 </when>
|
|
79 </conditional>
|
5
|
80 <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold" help="Selection specifies default configuration files">
|
|
81 <options from_data_table="plant_tribes_scaffolds" />
|
|
82 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
|
|
83 </param>
|
0
|
84 <conditional name="options_type">
|
|
85 <param name="options_type_selector" type="select" label="Options Configuration">
|
|
86 <option value="basic" selected="true">Basic</option>
|
|
87 <option value="advanced">Advanced</option>
|
|
88 </param>
|
|
89 <when value="basic" />
|
|
90 <when value="advanced">
|
|
91 <param name="min_coverage" type="float" value="0.5" min="0.3" max="1.0" label="Minimum sequence pairwise coverage length between homologous pairs" />
|
|
92 <param name="recalibration_rate" type="float" value="0.0" min="0.0" label="Predetermined evolutionary rate for recalibrating synonymous subsitutions (ks) of species" />
|
|
93 <conditional name="codeml_ctl_file_cond">
|
|
94 <param name="codeml_ctl_file_select" type="select" label="Select PAML codeml control file?" help="Used for ML analysis of protein-coding DNA sequences using codon substitution models, select No to use the default control file">
|
|
95 <option value="no" selected="true">No</option>
|
|
96 <option value="yes">Yes</option>
|
|
97 </param>
|
|
98 <when value="no" />
|
|
99 <when value="yes">
|
|
100 <param name="codeml_ctl_file" format="txt" type="data" label="PAML codeml control file" />
|
|
101 </when>
|
|
102 </conditional>
|
|
103 <conditional name="fit_components_cond">
|
|
104 <param name="fit_components" type="select" label="Fit a mixture model of multivariate normal components to synonymous (ks) distribution?" help="Used to identify significant duplication events in a genome">
|
|
105 <option value="no" selected="true">No</option>
|
|
106 <option value="yes">Yes</option>
|
|
107 </param>
|
|
108 <when value="no" />
|
|
109 <when value="yes">
|
|
110 <param name="num_of_components" type="integer" value="0" min="0" label="Number components to fit to synonymous subsitutions (ks) distribution" />
|
|
111 <param name="min_ks" type="float" value="0.0" min="0.0" label="Lower limit of synonymous subsitutions (ks)" help="Reduces background noise from young paralogous pairs due to normal gene births and deaths in a genome" />
|
|
112 <param name="max_ks" type="float" value="0.0" min="0.0" label="Upper limit of synonymous subsitutions (ks)" help="Excludes likey ancient paralogous pairs" />
|
|
113 </when>
|
|
114 </conditional>
|
|
115 </when>
|
|
116 </conditional>
|
|
117 </inputs>
|
|
118 <outputs>
|
5
|
119 <data name="output_species1_fna" format="fasta" label="KaKs analysis (coding sequences) on ${on_string}" />
|
|
120 <data name="output_species1_faa" format="fasta" label="KaKs analysis (amino acids) on ${on_string}" />
|
|
121 <data name="output_species2_fna" format="fasta" label="KaKs analysis (coding sequences) on ${on_string}">
|
|
122 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
123 </data>
|
|
124 <data name="output_species2_faa" format="fasta" label="KaKs analysis (amino acids) on ${on_string}">
|
|
125 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
126 </data>
|
|
127 <data name="output_species1_paralog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
|
|
128 <filter>comparison_cond['comparison'] == 'paralogs'</filter>
|
|
129 </data>
|
|
130 <data name="output_species1_ortholog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
|
|
131 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
132 </data>
|
|
133 <data name="output_species2_ortholog" format="tabular" label="KaKs analysis (blastn results) on ${on_string}">
|
|
134 <filter>comparison_cond['comparison'] == 'orthologs'</filter>
|
|
135 </data>
|
|
136 <data name="output_rbhb" format="tabular" label="KaKs analysis (paralogous pairs) on ${on_string}" />
|
|
137 <data name="output_kaks" format="tabular" label="KaKs analysis on ${on_string}" />
|
|
138 <data name="output_components" format="tabular" label="KaKs analysis (significant components in the ks distribution) on ${on_string}">
|
|
139 <filter>options_type[options_type_selector'] == 'advanced' and options_type['fit_components_cond']['fit_components'] == 'yes</filter>
|
|
140 </data>
|
0
|
141 </outputs>
|
|
142 <tests>
|
|
143 <test>
|
|
144 </test>
|
|
145 </tests>
|
|
146 <help>
|
|
147 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
|
1
|
148 complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. This tool performs orthologous
|
|
149 or paralogous ks analyses of coding sequences and amino acid sequences.
|
0
|
150
|
|
151 -----
|
|
152
|
|
153 **Options**
|
|
154
|
|
155 * **Required options**
|
|
156
|
|
157 - **Select gene family clusters** - Sequences classified into gene family clusters, optionally including corresponding coding sequences.
|
|
158 - **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
|
|
159 - **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters).
|
|
160
|
|
161 * **Multiple sequence alignments options**
|
|
162
|
|
163 - **Select method for multiple sequence alignments** - Method used for setting multiple sequence alignments.
|
|
164 - **Input sequences include corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs'.
|
|
165 - **Construct orthogroup multiple codon alignments?** - Construct orthogroup multiple codon alignments.
|
|
166 - **Sequence type used in the phylogenetic inference** - Sequence type (dna or amino acid) used in the phylogenetic inference.
|
|
167 - **Use corresponding coding sequences?** - Selecting 'Yes' for this option requires that the selected input data format is 'ptorthocs' or this tool will produce an error.
|
|
168
|
|
169 * **Phylogenetic trees options**
|
|
170
|
|
171 - **Phylogenetic trees inference method** - Phylogenetic trees inference method.
|
|
172 - **Select rooting order configuration for rooting trees??** - If 'No' is selected, trees will be rooted using the most distant taxon present in the orthogroup.
|
|
173 - **Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree** - Number of replicates for rapid bootstrap analysis and search for the best-scoring ML tree.
|
|
174 - **Maximum number of sequences in orthogroup alignments** - Maximum number of sequences in orthogroup alignments.
|
|
175 - **Minimum number of sequences in orthogroup alignments** - Minimum number of sequences in orthogroup alignments.
|
|
176
|
|
177 * **MSA quality control options**
|
|
178
|
|
179 - **Remove sequences with gaps of** - Removes gappy sequences in alignments (i.e., 0.5 removes sequences with 50% gaps).
|
|
180 - **Select process used for gap trimming** - Either nucleotide based trimming or alignments are trimed using using trimAl's ML heuristic trimming approach.
|
|
181 - **Remove sites in alignments with gaps of** - If the process used for gap trimming is nucleotide based, this is the gap value used when removing gappy sites in alignments (i.e., 0.1 removes sites with 90% gaps).
|
|
182
|
|
183 </help>
|
|
184 <citations>
|
|
185 <citation type="bibtex">
|
|
186 @unpublished{None,
|
|
187 author = {Eric Wafula},
|
|
188 title = {None},
|
|
189 year = {None},
|
|
190 url = {https://github.com/dePamphilis/PlantTribes}
|
|
191 }
|
|
192 </citation>
|
|
193 <citation type="doi">10.1186/1471-2105-10-421</citation>
|
|
194 <citation type="doi">10.1093/molbev/msm088</citation>
|
|
195 <citation type="doi">10.18637/jss.v004.i02</citation>
|
|
196 </citations>
|
|
197 </tool>
|