154
|
1 <tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="@WRAPPER_VERSION@.3.0">
|
134
|
2 <description>classifies gene sequences into pre-computed orthologous gene family clusters</description>
|
131
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements_gene_family_classifier" />
|
151
|
7 <command detect_errors="exit_code"><![CDATA[
|
|
8 #if str($options_type.options_type_selector) == 'advanced':
|
|
9 #set specify_super_orthogroups_cond = $options_type.specify_super_orthogroups_cond
|
|
10 #set specify_super_orthogroups = $specify_super_orthogroups_cond.specify_super_orthogroups
|
|
11 #set create_orthogroup_cond = $options_type.create_orthogroup_cond
|
|
12 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup
|
|
13 #set specify_single_copy_cond = $options_type.specify_single_copy_cond
|
|
14 #set specify_single_copy = $specify_single_copy_cond.specify_single_copy
|
|
15 #if str($specify_super_orthogroups) == 'yes':
|
|
16 #set specify_super_orthos = True
|
|
17 #set super_orthogroups = $specify_super_orthogroups_cond.super_orthogroups
|
|
18 #else:
|
|
19 #set specify_super_orthos = False
|
|
20 #end if
|
|
21 #if str($create_orthogroup) == 'yes':
|
|
22 #set create_ortho_sequences = True
|
|
23 #set create_corresponding_coding_sequences_cond = $create_orthogroup_cond.create_corresponding_coding_sequences_cond
|
|
24 #if str($create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences) == 'yes':
|
|
25 #set create_corresponding_coding_sequences = True
|
|
26 #else:
|
|
27 #set create_corresponding_coding_sequences = False
|
|
28 #end if
|
|
29 #else:
|
|
30 #set create_ortho_sequences = False
|
|
31 #set create_corresponding_coding_sequences = False
|
|
32 #end if
|
|
33 #if str($specify_single_copy) == 'yes':
|
|
34 #set single_copy_orthogroup = True
|
|
35 #set single_copy_cond = $specify_single_copy_cond.single_copy_cond
|
|
36 #set single_copy = $single_copy_cond.single_copy
|
|
37 #else:
|
|
38 #set single_copy_orthogroup = False
|
|
39 #end if
|
|
40 #else:
|
|
41 #set single_copy_orthogroup = False
|
|
42 #set create_ortho_sequences = False
|
|
43 #set create_corresponding_coding_sequences = False
|
|
44 #end if
|
20
|
45
|
152
|
46 python '$__tool_directory__/gene_family_classifier.py'
|
151
|
47 --input '$input'
|
|
48 --scaffold '$scaffold.fields.path'
|
|
49 --method $method
|
|
50 --classifier $save_hmmscan_log_cond.classifier
|
|
51 --config_dir '$scaffold.fields.path'
|
|
52 --num_threads \${GALAXY_SLOTS:-4}
|
20
|
53
|
151
|
54 #if str($options_type.options_type_selector) == 'advanced':
|
|
55 #if specify_super_orthos:
|
|
56 --super_orthogroups $super_orthogroups
|
|
57 #end if
|
|
58 #if $single_copy_orthogroup:
|
|
59 #if str($single_copy) == 'custom':
|
|
60 #set single_copy_custom_cond = $single_copy_cond.single_copy_custom_cond
|
|
61 #set single_copy_custom = $single_copy_custom_cond.single_copy_custom
|
|
62 #if str($single_copy_custom) == 'no':
|
152
|
63 --single_copy_custom default
|
151
|
64 #else:
|
|
65 --single_copy_custom '$single_copy_custom_cond.single_copy_custom_config'
|
|
66 #end if
|
|
67 #else:
|
|
68 #if str($single_copy_cond.single_copy_taxa):
|
|
69 --single_copy_taxa $single_copy_cond.single_copy_taxa
|
|
70 #end if
|
|
71 #if str($single_copy_cond.taxa_present):
|
|
72 --taxa_present $single_copy_cond.taxa_present
|
0
|
73 #end if
|
151
|
74 #end if
|
|
75 #end if
|
|
76 #if $create_ortho_sequences:
|
152
|
77 --orthogroup_fasta true
|
151
|
78 #if $create_corresponding_coding_sequences:
|
|
79 --coding_sequences '$create_corresponding_coding_sequences_cond.coding_sequences'
|
|
80 #end if
|
|
81 #end if
|
|
82 #end if
|
20
|
83
|
151
|
84 #if (str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both') and str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes':
|
152
|
85 --save_hmmscan_log true
|
155
|
86 --hmmscan_log '$output_hmmscan_log'
|
151
|
87 #end if
|
|
88 ]]></command>
|
0
|
89 <inputs>
|
121
|
90 <param name="input" format="fasta" type="data" label="Proteins fasta file"/>
|
|
91 <param name="scaffold" type="select" label="Gene family scaffold">
|
0
|
92 <options from_data_table="plant_tribes_scaffolds" />
|
39
|
93 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
|
0
|
94 </param>
|
|
95 <param name="method" type="select" label="Protein clustering method">
|
|
96 <option value="gfam" selected="true">GFam</option>
|
|
97 <option value="orthofinder">OrthoFinder</option>
|
|
98 <option value="orthomcl">OrthoMCL</option>
|
|
99 </param>
|
13
|
100 <conditional name="save_hmmscan_log_cond">
|
121
|
101 <param name="classifier" type="select" label="Protein classifier">
|
13
|
102 <option value="blastp" selected="true">blastp</option>
|
130
|
103 <option value="hmmscan">hmmscan</option>
|
|
104 <option value="both">Both blastp and hmmscan</option>
|
13
|
105 </param>
|
|
106 <when value="blastp" />
|
|
107 <when value="hmmscan">
|
121
|
108 <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
|
13
|
109 <option value="no" selected="true">No</option>
|
|
110 <option value="yes">Yes</option>
|
|
111 </param>
|
|
112 </when>
|
|
113 <when value="both">
|
121
|
114 <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
|
13
|
115 <option value="no" selected="true">No</option>
|
|
116 <option value="yes">Yes</option>
|
|
117 </param>
|
|
118 </when>
|
|
119 </conditional>
|
0
|
120 <conditional name="options_type">
|
121
|
121 <param name="options_type_selector" type="select" label="Options configuration">
|
0
|
122 <option value="basic" selected="true">Basic</option>
|
|
123 <option value="advanced">Advanced</option>
|
|
124 </param>
|
|
125 <when value="basic" />
|
|
126 <when value="advanced">
|
51
|
127 <conditional name="specify_super_orthogroups_cond">
|
121
|
128 <param name="specify_super_orthogroups" type="select" label="Super orthogroups configuration">
|
51
|
129 <option value="no" selected="true">No</option>
|
|
130 <option value="yes">Yes</option>
|
|
131 </param>
|
|
132 <when value="no"/>
|
|
133 <when value="yes">
|
121
|
134 <param name="super_orthogroups" type="select" label="Clustering distance measure">
|
130
|
135 <option value="min_evalue" selected="true">minimum e-value</option>
|
|
136 <option value="avg_evalue">average e-value</option>
|
51
|
137 </param>
|
|
138 </when>
|
|
139 </conditional>
|
44
|
140 <conditional name="specify_single_copy_cond">
|
121
|
141 <param name="specify_single_copy" type="select" label="Single copy orthogroups configuration">
|
44
|
142 <option value="no" selected="true">No</option>
|
|
143 <option value="yes">Yes</option>
|
0
|
144 </param>
|
44
|
145 <when value="no"/>
|
|
146 <when value="yes">
|
|
147 <conditional name="single_copy_cond">
|
121
|
148 <param name="single_copy" type="select" label="Selection criterion">
|
130
|
149 <option value="taxa" selected="true">Global selection</option>
|
|
150 <option value="custom">Custom selection</option>
|
44
|
151 </param>
|
70
|
152 <when value="custom">
|
|
153 <conditional name="single_copy_custom_cond">
|
121
|
154 <param name="single_copy_custom" type="select" label="Custom selection configuration">
|
70
|
155 <option value="no" selected="true">No</option>
|
|
156 <option value="yes">Yes</option>
|
|
157 </param>
|
|
158 <when value="no"/>
|
|
159 <when value="yes">
|
121
|
160 <param name="single_copy_custom_config" format="txt" type="data" label="Custom selection file"/>
|
70
|
161 </when>
|
|
162 </conditional>
|
|
163 </when>
|
44
|
164 <when value="taxa">
|
133
|
165 <param name="single_copy_taxa" type="integer" optional="true" min="0" label="Minimum single copy taxa"/>
|
|
166 <param name="taxa_present" type="integer" optional="true" min="0" label="Minimum taxa present"/>
|
44
|
167 </when>
|
|
168 </conditional>
|
0
|
169 </when>
|
|
170 </conditional>
|
|
171 <conditional name="create_orthogroup_cond">
|
121
|
172 <param name="create_orthogroup" type="select" label="Orthogroups fasta configuration">
|
0
|
173 <option value="no" selected="true">No</option>
|
|
174 <option value="yes">Yes</option>
|
|
175 </param>
|
|
176 <when value="no" />
|
|
177 <when value="yes">
|
17
|
178 <conditional name="create_corresponding_coding_sequences_cond">
|
121
|
179 <param name="create_corresponding_coding_sequences" type="select" label="Orthogroups coding sequences">
|
17
|
180 <option value="no" selected="true">No</option>
|
|
181 <option value="yes">Yes</option>
|
|
182 </param>
|
|
183 <when value="no" />
|
|
184 <when value="yes">
|
121
|
185 <param name="coding_sequences" format="fasta" type="data" label="Coding sequences fasta file"/>
|
17
|
186 </when>
|
|
187 </conditional>
|
0
|
188 </when>
|
|
189 </conditional>
|
|
190 </when>
|
|
191 </conditional>
|
|
192 </inputs>
|
|
193 <outputs>
|
154
|
194 <data name="output_hmmscan_log" format="txt" label="${tool.name} (hmmscan.log) on ${on_string}">
|
15
|
195 <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter>
|
|
196 </data>
|
154
|
197 <collection name="output_orthos" type="list">
|
|
198 <discover_datasets pattern="__name__" directory="output_orthos_dir" visible="false" ext="tabular" />
|
156
|
199 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['specify_single_copy_cond']['specify_single_copy'] == 'no'</filter>
|
154
|
200 </collection>
|
|
201 <collection name="output_single_copy_fasta" type="list">
|
|
202 <discover_datasets pattern="__name__" directory="output_single_copy_fasta_dir" visible="false" ext="fasta" />
|
53
|
203 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['specify_single_copy_cond']['specify_single_copy'] == 'yes'</filter>
|
0
|
204 </collection>
|
|
205 </outputs>
|
|
206 <tests>
|
|
207 <test>
|
147
|
208 <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta"/>
|
|
209 <param name="scaffold" value="22Gv1.1"/>
|
30
|
210 <param name="method" value="orthomcl"/>
|
147
|
211 <param name="classifier" value="both"/>
|
|
212 <param name="options_type_selector" value="advanced"/>
|
|
213 <param name="create_orthogroup" value="yes"/>
|
|
214 <param name="create_corresponding_coding_sequences" value="yes"/>
|
|
215 <output name="output_ptorthocs" file="output.ptorthocs" ftype="ptorthocs"/>
|
0
|
216 <output_collection name="orthos" type="list">
|
132
|
217 <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/>
|
147
|
218 <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
|
|
219 <element name="proteins.both.22Gv1.1.bestOrthos" file="proteins.both.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
|
|
220 <element name="proteins.both.22Gv1.1.bestOrthos.summary" file="proteins.both.22Gv1.1.bestOrthos.summary" ftype="tabular" compare="contains"/>
|
|
221 <element name="proteins.hmmscan.22Gv1.1" file="proteins.hmmscan.22Gv1.1" ftype="tabular" compare="contains"/>
|
|
222 <element name="proteins.hmmscan.22Gv1.1.bestOrthos" file="proteins.hmmscan.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
|
0
|
223 </output_collection>
|
|
224 </test>
|
|
225 </tests>
|
|
226 <help>
|
121
|
227 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
|
|
228 analyses of genome-scale gene families and transcriptomes. This tool classifies gene coding sequences either produced by
|
|
229 the AssemblyPostProcessor tool or from an external source into pre-computed orthologous gene family clusters (orthogroups)
|
|
230 of a PlantTribes scaffold. Classified sequences are then assigned with the corresponding orthogroups’ metadata that includes
|
|
231 gene counts of backbone taxa, super clusters (super orthogoups) at multiple stringencies, and functional annotations from
|
134
|
232 sources such as Gene Ontology (GO), InterPro protein domains, TAIR, UniProtKB/TrEMBL, and UniProtKB/Swiss-Prot. Additionally,
|
|
233 sequences belonging to single/low-copy gene families that are mainly utilized in species tree inference can be determined.
|
0
|
234
|
|
235 -----
|
|
236
|
121
|
237 **Required options**
|
|
238
|
|
239 * **Proteins fasta file** - proteins fasta file either produced by the AssemblyPostProcessor tool or an external source selected from your history.
|
|
240 * **Gene family scaffold** - one of the PlantTribes gene family scaffolds [2-4] installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
|
|
241 * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
|
130
|
242 * **Protein classifier** - classifier to assign protein sequences into a specified scaffold orthogroups. PlantTribes implements three classification approaches; blastp (faster)[5], hmmscan (slower but more sensitive assignment of divergent homologs)[6], and both blastp and hmmscan (disagreements resolved in favor of hmmscan; more exhaustive).
|
121
|
243
|
|
244 **Other options**
|
|
245
|
130
|
246 * **Super orthogroups configuration** - select ‘Yes’ to enable super orthogroups configuration options. Super orthogroups[7] are constructed through a second iteration of MCL clustering to connect distant, but potentially related orthogroup clusters.
|
121
|
247
|
130
|
248 * **Clustering distance measure** - distance measure used in merging orthogroup clusters into super orthogroup clusters. PlantTribes pre-computed super orthogroups are based on the minimum and average blastp e-value between all pairs of scaffold orthogroups used as the input matrix for MCL clustering algorithm[8].
|
121
|
249
|
|
250 * **Single copy orthogroups configuration** - select ‘Yes’ to enable single/low-copy orthogroups selection configuration options.
|
0
|
251
|
130
|
252 * **Selection criterion** - single/low-copy orthogroups selection criterion. PlantTribes provides custom and global selection criteria for selecting user-defined single/low-copy scaffold orthogoups.
|
|
253
|
133
|
254 * **Global selection configuration** - the upper limit values of the following two parameters vary depending on the selected gene family scaffold, and the tool will produce an error if the value exceeds the number of species in the circumscribed scaffold.
|
130
|
255
|
|
256 * **Minimum single copy taxa** - minimum number of taxa with single copy genes in the orthogroup.
|
|
257 * **Minimum taxa present** - minimum number of taxa present in the orthogroup.
|
121
|
258
|
135
|
259 * **Custom selection configuration** - select ‘Yes’ to enable selection of a single copy configuration file. Scaffold configuration templates (.singleCopy.config) of how to customize single/low-copy orthogroups selection can be found in the scaffold data installed into Galaxy via the PlantTribes Scaffolds Download Data Manager tool, and also available at the PlantTribes GitHub `repository`_. Single/low-copy settings shown in these templates are used as defaults if ‘No’ is selected.
|
|
260
|
121
|
261 * **Custom selection file** - select a single/low-copy customized configuration file from your history.
|
|
262
|
|
263 * **Orthogroups fasta configuration** - select ‘Yes’ to create proteins orthogroups fasta files for the classified sequences.
|
|
264
|
138
|
265 * **Orthogroups coding sequences** - select ‘Yes’ to create corresponding coding sequences orthogroup fasta files for the classified protein sequences. Requires coding sequences fasta file corresponding to the proteins fasta file to be selected from your history.
|
121
|
266
|
|
267 * **Coding sequences fasta file** - select coding sequences fasta file corresponding to the proteins fasta file from your history.
|
|
268
|
137
|
269 .. _repository: https://github.com/dePamphilis/PlantTribes/tree/master/config
|
|
270
|
0
|
271 </help>
|
|
272 <citations>
|
131
|
273 <expand macro="citation1" />
|
134
|
274 <citation type="bibtex">
|
|
275 @article{Sasidharan2012,
|
|
276 journal = {Nucleic Acids Research},
|
|
277 author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
|
|
278 title = {GFam: a platform for automatic annotation of gene families},
|
|
279 year = {2012},
|
|
280 pages = {gks631},}
|
|
281 </citation>
|
|
282 <citation type="bibtex">
|
|
283 @article{Li2003,
|
|
284 journal = {Genome Research}
|
|
285 author = {3. Li L, Stoeckert CJ, Roos DS},
|
|
286 title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
|
|
287 year = {2003},
|
|
288 volume = {13},
|
|
289 number = {9},
|
|
290 pages = {2178-2189},}
|
|
291 </citation>
|
|
292 <citation type="bibtex">
|
|
293 @article{Emms2015,
|
|
294 journal = {Genome Biology}
|
|
295 author = {4. Emms DM, Kelly S},
|
|
296 title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
|
|
297 year = {2015},
|
|
298 volume = {16},
|
|
299 number = {1},
|
|
300 pages = {157},}
|
|
301 </citation>
|
121
|
302 <citation type="bibtex">
|
|
303 @article{Altschul1990,
|
|
304 journal = {Journal of molecular biology}
|
|
305 author = {5. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
|
|
306 title = {Basic local alignment search tool},
|
|
307 year = {1990},
|
|
308 volume = {215},
|
|
309 number = {3},
|
|
310 pages = {403-410},}
|
|
311 </citation>
|
|
312 <citation type="bibtex">
|
|
313 @article{Eddy2009,
|
|
314 journal = {Genome Inform},
|
|
315 author = {6. Eddy SR},
|
|
316 title = {A new generation of homology search tools based on probabilistic inference},
|
|
317 year = {2009},
|
|
318 volume = {23},
|
|
319 number = {1},
|
|
320 pages = {205-211},}
|
|
321 </citation>
|
|
322 <citation type="bibtex">
|
130
|
323 @article{Wall2008,
|
|
324 journal = {Nucleic Acids Research},
|
|
325 author = {7. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
|
|
326 title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
|
|
327 year = {2008},
|
|
328 volume = {36},
|
|
329 number = {suppl 1},
|
|
330 pages = {D970-D976},}
|
|
331 </citation>
|
|
332 <citation type="bibtex">
|
121
|
333 @article{Enright2002,
|
|
334 journal = {Nucleic acids research},
|
130
|
335 author = {8. Enright AJ, Van Dongen S, Ouzounis CA},
|
121
|
336 title = {n efficient algorithm for large-scale detection of protein families},
|
|
337 year = {2002},
|
|
338 volume = {30},
|
|
339 number = {7},
|
|
340 pages = {1575-1584},}
|
|
341 </citation>
|
0
|
342 </citations>
|
|
343 </tool>
|