118
|
1 <tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="0.4.0">
|
|
2 <description>classifies gene sequences into pre-computed orthologous plant gene family clusters</description>
|
0
|
3 <requirements>
|
71
|
4 <requirement type="package" version="0.4">plant_tribes_gene_family_classifier</requirement>
|
0
|
5 </requirements>
|
|
6 <stdio>
|
|
7 <!-- Anything other than zero is an error -->
|
|
8 <exit_code range="1:" />
|
29
|
9 <exit_code range=":-1" />
|
|
10 <!-- In case the return code has not been set propery check stderr too -->
|
0
|
11 <regex match="Error:" />
|
|
12 <regex match="Exception:" />
|
|
13 </stdio>
|
|
14 <command>
|
|
15 <![CDATA[
|
21
|
16 #if str($options_type.options_type_selector) == 'advanced':
|
51
|
17 #set specify_super_orthogroups_cond = $options_type.specify_super_orthogroups_cond
|
|
18 #set specify_super_orthogroups = $specify_super_orthogroups_cond.specify_super_orthogroups
|
17
|
19 #set create_orthogroup_cond = $options_type.create_orthogroup_cond
|
|
20 #set create_orthogroup = $create_orthogroup_cond.create_orthogroup
|
44
|
21 #set specify_single_copy_cond = $options_type.specify_single_copy_cond
|
46
|
22 #set specify_single_copy = $specify_single_copy_cond.specify_single_copy
|
51
|
23 #if str($specify_super_orthogroups) == 'yes':
|
|
24 #set specify_super_orthos = True
|
|
25 #set super_orthogroups = $specify_super_orthogroups_cond.super_orthogroups
|
|
26 #else:
|
|
27 #set specify_super_orthos = False
|
|
28 #end if
|
21
|
29 #if str($create_orthogroup) == 'yes':
|
44
|
30 #set create_ortho_sequences = True
|
21
|
31 #set create_corresponding_coding_sequences_cond = $create_orthogroup_cond.create_corresponding_coding_sequences_cond
|
|
32 #if str($create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences) == 'yes':
|
|
33 #set create_corresponding_coding_sequences = True
|
|
34 #else:
|
|
35 #set create_corresponding_coding_sequences = False
|
44
|
36 #end if
|
21
|
37 #else:
|
|
38 #set create_ortho_sequences = False
|
23
|
39 #set create_corresponding_coding_sequences = False
|
21
|
40 #end if
|
52
|
41 #if str($specify_single_copy) == 'yes':
|
|
42 #set single_copy_orthogroup = True
|
|
43 #set single_copy_cond = $specify_single_copy_cond.single_copy_cond
|
|
44 #set single_copy = $single_copy_cond.single_copy
|
|
45 #else:
|
|
46 #set single_copy_orthogroup = False
|
|
47 #end if
|
13
|
48 #else:
|
47
|
49 #set single_copy_orthogroup = False
|
13
|
50 #set create_ortho_sequences = False
|
23
|
51 #set create_corresponding_coding_sequences = False
|
4
|
52 #end if
|
20
|
53
|
98
|
54 python $__tool_directory__/gene_family_classifier.py
|
100
|
55 --input '$input'
|
67
|
56 --scaffold '$scaffold.fields.path'
|
0
|
57 --method $method
|
13
|
58 --classifier $save_hmmscan_log_cond.classifier
|
69
|
59 --config_dir '$scaffold.fields.path'
|
30
|
60 --num_threads \${GALAXY_SLOTS:-4}
|
20
|
61
|
0
|
62 #if str($options_type.options_type_selector) == 'advanced':
|
51
|
63 #if specify_super_orthos:
|
|
64 --super_orthogroups $super_orthogroups
|
|
65 #end if
|
47
|
66 #if $single_copy_orthogroup:
|
68
|
67 #if str($single_copy) == 'custom':
|
70
|
68 #set single_copy_custom_cond = $single_copy_cond.single_copy_custom_cond
|
|
69 #set single_copy_custom = $single_copy_custom_cond.single_copy_custom
|
|
70 #if str($single_copy_custom) == 'no':
|
72
|
71 --single_copy_custom 'default'
|
70
|
72 #else:
|
|
73 --single_copy_custom '$single_copy_custom_cond.single_copy_custom_config'
|
|
74 #end if
|
68
|
75 #else:
|
|
76 --single_copy_taxa $single_copy_cond.single_copy_taxa
|
|
77 --taxa_present $single_copy_cond.taxa_present
|
|
78 #end if
|
0
|
79 #end if
|
89
|
80 #if $create_ortho_sequences:
|
99
|
81 --orthogroup_fasta "true"
|
18
|
82 #if $create_corresponding_coding_sequences:
|
35
|
83 --coding_sequences '$create_corresponding_coding_sequences_cond.coding_sequences'
|
17
|
84 #end if
|
0
|
85 #end if
|
|
86 #end if
|
20
|
87
|
97
|
88 #if str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both' and str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes':
|
|
89 --save_hmmscan_log
|
|
90 --hmmscan_log '$hmmscan_log'
|
12
|
91 #end if
|
58
|
92 #if $create_ortho_sequences:
|
62
|
93 #if $create_corresponding_coding_sequences:
|
97
|
94 --output_ptorthocs '$output_ptorthocs'
|
102
|
95 --output_ptorthocs_dir '$output_ptorthocs.files_path'
|
62
|
96 #else:
|
97
|
97 --output_ptortho '$output_ptortho'
|
102
|
98 --output_ptortho_dir '$output_ptortho.files_path'
|
18
|
99 #end if
|
4
|
100 #end if
|
47
|
101 #if $single_copy_orthogroup:
|
58
|
102 #if $create_ortho_sequences:
|
97
|
103 --output_ptsco '$output_ptsco'
|
102
|
104 --output_ptsco_dir '$output_ptsco.files_path'
|
48
|
105 #end if
|
44
|
106 #end if
|
0
|
107 ]]>
|
|
108 </command>
|
|
109 <inputs>
|
121
|
110 <param name="input" format="fasta" type="data" label="Proteins fasta file"/>
|
|
111 <param name="scaffold" type="select" label="Gene family scaffold">
|
0
|
112 <options from_data_table="plant_tribes_scaffolds" />
|
39
|
113 <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table."/>
|
0
|
114 </param>
|
|
115 <param name="method" type="select" label="Protein clustering method">
|
|
116 <option value="gfam" selected="true">GFam</option>
|
|
117 <option value="orthofinder">OrthoFinder</option>
|
|
118 <option value="orthomcl">OrthoMCL</option>
|
|
119 </param>
|
13
|
120 <conditional name="save_hmmscan_log_cond">
|
121
|
121 <param name="classifier" type="select" label="Protein classifier">
|
13
|
122 <option value="blastp" selected="true">blastp</option>
|
|
123 <option value="hmmscan">HMMScan</option>
|
|
124 <option value="both">Both blastp and HMMScan</option>
|
|
125 </param>
|
|
126 <when value="blastp" />
|
|
127 <when value="hmmscan">
|
121
|
128 <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
|
13
|
129 <option value="no" selected="true">No</option>
|
|
130 <option value="yes">Yes</option>
|
|
131 </param>
|
|
132 </when>
|
|
133 <when value="both">
|
121
|
134 <param name="save_hmmscan_log" type="select" label="Save hmmscan log?">
|
13
|
135 <option value="no" selected="true">No</option>
|
|
136 <option value="yes">Yes</option>
|
|
137 </param>
|
|
138 </when>
|
|
139 </conditional>
|
0
|
140 <conditional name="options_type">
|
121
|
141 <param name="options_type_selector" type="select" label="Options configuration">
|
0
|
142 <option value="basic" selected="true">Basic</option>
|
|
143 <option value="advanced">Advanced</option>
|
|
144 </param>
|
|
145 <when value="basic" />
|
|
146 <when value="advanced">
|
51
|
147 <conditional name="specify_super_orthogroups_cond">
|
121
|
148 <param name="specify_super_orthogroups" type="select" label="Super orthogroups configuration">
|
51
|
149 <option value="no" selected="true">No</option>
|
|
150 <option value="yes">Yes</option>
|
|
151 </param>
|
|
152 <when value="no"/>
|
|
153 <when value="yes">
|
121
|
154 <param name="super_orthogroups" type="select" label="Clustering distance measure">
|
|
155 <option value="min_evalue" selected="true">blastp e-value</option>
|
51
|
156 </param>
|
|
157 </when>
|
|
158 </conditional>
|
44
|
159 <conditional name="specify_single_copy_cond">
|
121
|
160 <param name="specify_single_copy" type="select" label="Single copy orthogroups configuration">
|
44
|
161 <option value="no" selected="true">No</option>
|
|
162 <option value="yes">Yes</option>
|
0
|
163 </param>
|
44
|
164 <when value="no"/>
|
|
165 <when value="yes">
|
|
166 <conditional name="single_copy_cond">
|
121
|
167 <param name="single_copy" type="select" label="Selection criterion">
|
|
168 <option value="custom" selected="true">Custom selection</option>
|
|
169 <option value="taxa">Global selection</option>
|
44
|
170 </param>
|
70
|
171 <when value="custom">
|
|
172 <conditional name="single_copy_custom_cond">
|
121
|
173 <param name="single_copy_custom" type="select" label="Custom selection configuration">
|
70
|
174 <option value="no" selected="true">No</option>
|
|
175 <option value="yes">Yes</option>
|
|
176 </param>
|
|
177 <when value="no"/>
|
|
178 <when value="yes">
|
121
|
179 <param name="single_copy_custom_config" format="txt" type="data" label="Custom selection file"/>
|
70
|
180 </when>
|
|
181 </conditional>
|
|
182 </when>
|
44
|
183 <when value="taxa">
|
121
|
184 <param name="single_copy_taxa" type="integer" value="20" label="Minimum single copy taxa"/>
|
|
185 <param name="taxa_present" type="integer" value="21" label="Minimum taxa present"/>
|
44
|
186 </when>
|
|
187 </conditional>
|
0
|
188 </when>
|
|
189 </conditional>
|
|
190 <conditional name="create_orthogroup_cond">
|
121
|
191 <param name="create_orthogroup" type="select" label="Orthogroups fasta configuration">
|
0
|
192 <option value="no" selected="true">No</option>
|
|
193 <option value="yes">Yes</option>
|
|
194 </param>
|
|
195 <when value="no" />
|
|
196 <when value="yes">
|
17
|
197 <conditional name="create_corresponding_coding_sequences_cond">
|
121
|
198 <param name="create_corresponding_coding_sequences" type="select" label="Orthogroups coding sequences">
|
17
|
199 <option value="no" selected="true">No</option>
|
|
200 <option value="yes">Yes</option>
|
|
201 </param>
|
|
202 <when value="no" />
|
|
203 <when value="yes">
|
121
|
204 <param name="coding_sequences" format="fasta" type="data" label="Coding sequences fasta file"/>
|
17
|
205 </when>
|
|
206 </conditional>
|
0
|
207 </when>
|
|
208 </conditional>
|
|
209 </when>
|
|
210 </conditional>
|
|
211 </inputs>
|
|
212 <outputs>
|
15
|
213 <data name="hmmscan_log" format="txt" label="Protein classification hmmscan.log on ${on_string}">
|
|
214 <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter>
|
|
215 </data>
|
63
|
216 <data name="output_ptortho" format="ptortho" label="Gene family clusters on ${on_string}">
|
33
|
217 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'no'</filter>
|
31
|
218 </data>
|
63
|
219 <data name="output_ptorthocs" format="ptorthocs" label="Gene family clusters and corresponding coding sequences on ${on_string}">
|
31
|
220 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'yes'</filter>
|
9
|
221 </data>
|
63
|
222 <data name="output_ptsco" format="tabular" label="Single copy orthogroups on ${on_string}">
|
53
|
223 <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['specify_single_copy_cond']['specify_single_copy'] == 'yes'</filter>
|
63
|
224 <change_format>
|
|
225 <when input="options_type.create_orthogroup_cond.create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences" value="no" format="ptortho" />
|
|
226 <when input="options_type.create_orthogroup_cond.create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences" value="yes" format="ptorthocs" />
|
|
227 </change_format>
|
52
|
228 </data>
|
30
|
229 <collection name="orthos" type="list">
|
12
|
230 <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" />
|
0
|
231 </collection>
|
|
232 </outputs>
|
|
233 <tests>
|
|
234 <test>
|
30
|
235 <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta" />
|
|
236 <param name="scaffold" value="22Gv1.1"/>
|
|
237 <param name="method" value="orthomcl"/>
|
|
238 <param name="classifier" value="blastp"/>
|
0
|
239 <param name="dereplicate" value="yes"/>
|
|
240 <param name="min_length" value="200"/>
|
|
241 <output_collection name="orthos" type="list">
|
35
|
242 <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular"/>
|
|
243 <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular"/>
|
|
244 <element name="proteins.blastp.22Gv1.1.bestOrthos.summary" file="proteins.blastp.22Gv1.1.bestOrthos.summary" ftype="tabular"/>
|
0
|
245 </output_collection>
|
|
246 </test>
|
|
247 </tests>
|
|
248 <help>
|
121
|
249 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
|
|
250 analyses of genome-scale gene families and transcriptomes. This tool classifies gene coding sequences either produced by
|
|
251 the AssemblyPostProcessor tool or from an external source into pre-computed orthologous gene family clusters (orthogroups)
|
|
252 of a PlantTribes scaffold. Classified sequences are then assigned with the corresponding orthogroups’ metadata that includes
|
|
253 gene counts of backbone taxa, super clusters (super orthogoups) at multiple stringencies, and functional annotations from
|
|
254 sources such as Gene Ontology (GO), InterPro protein domains, and UniProt KB/Swiss-Prot. Additionally, sequences belonging
|
|
255 to single/low-copy gene families that are mainly utilized in species tree inference can be determined.
|
0
|
256
|
|
257 -----
|
|
258
|
121
|
259 **Required options**
|
|
260
|
|
261 * **Proteins fasta file** - proteins fasta file either produced by the AssemblyPostProcessor tool or an external source selected from your history.
|
|
262 * **Gene family scaffold** - one of the PlantTribes gene family scaffolds [2-4] installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
|
|
263 * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
|
|
264 * **Protein classifier** - Classifier to assign protein sequences into a specified scaffold orthogroups. PlantTribes implements three classification approaches; blastp (faster)[5], hmmscan (slower but more sensitive to the remote homologs)[6], and both blastp and hmmscan (more exhaustive).
|
|
265
|
|
266 **Other options**
|
|
267
|
|
268 * **Super orthogroups configuration** - select ‘Yes’ to enable super orthogroups configuration options. Super orthogroups are constructed through a second iteration of MCL clustering to connect distant, but potentially related orthogroup clusters.
|
|
269
|
|
270 * **Clustering distance measure** - distance measure used in merging orthogroup clusters into super orthogroup clusters. PlantTribes pre-computed super orthogroups are based on the minimum and average blastp e-value between all pairs of scaffold orthogroups used as the input matrix for MCL clustering algorithm[7].
|
|
271
|
|
272 * **Single copy orthogroups configuration** - select ‘Yes’ to enable single/low-copy orthogroups selection configuration options.
|
0
|
273
|
121
|
274 * **Selection criterion** - single/low-copy orthogroups selection criterion. PlantTribes provides custom and global selection criteria for selecting user defined single/low-copy scaffold orthogoups.
|
|
275
|
|
276 * **Custom selection configuration** - select ‘Yes’ to enable selection of a single copy configuration file. Scaffold configuration templates(.singleCopy.config) of how to customize single/low-copy orthogroups selection can be found in the scaffold data installed into Galaxy via the PlantTribes Scaffolds Download Data Manager tool, and also available at the PlantTribes GitHub repository (https://github.com/dePamphilis/PlantTribes/config ). Single/low-copy settings shown in these templates are used as defaults if ‘No’ is selected.
|
|
277
|
|
278 * **Custom selection file** - select a single/low-copy customized configuration file from your history.
|
|
279
|
|
280 * **Global selection configuration** - Used with "Global selection" configuration only.
|
|
281
|
|
282 * **Minimum single copy taxa** - Minimum number of taxa with single copy genes in the orthogroup.
|
|
283 * **Minimum taxa present** - Minimum number of taxa present in the orthogroup.
|
|
284
|
|
285 * **Orthogroups fasta configuration** - select ‘Yes’ to create proteins orthogroups fasta files for the classified sequences.
|
|
286
|
|
287 * **Orthogroups coding sequences** - select ‘Yes’ to create corresponding coding sequences orthogroups fasta files for the classified protein sequences. Requires coding sequences fasta file corresponding proteins fasta file to be selected from your history.
|
|
288
|
|
289 * **Coding sequences fasta file** - select coding sequences fasta file corresponding to the proteins fasta file from your history.
|
|
290
|
0
|
291 </help>
|
|
292 <citations>
|
118
|
293 <citation type="bibtex">
|
|
294 @misc{None,
|
|
295 journal = {None},
|
|
296 author = {1. Wafula EK},
|
|
297 title = {Manuscript in preparation},
|
|
298 year = {None},
|
|
299 url = {https://github.com/dePamphilis/PlantTribes},}
|
|
300 </citation>
|
|
301 <citation type="bibtex">
|
121
|
302 @article{Sasidharan2012,
|
|
303 journal = {Nucleic Acids Research},
|
|
304 author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
|
|
305 title = {GFam: a platform for automatic annotation of gene families},
|
|
306 year = {2012},
|
|
307 pages = {gks631},}
|
|
308 </citation>
|
|
309 <citation type="bibtex">
|
|
310 @article{Li2003,
|
|
311 journal = {Genome Research}
|
|
312 author = {3. Li L, Stoeckert CJ, Roos DS},
|
|
313 title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
|
|
314 year = {2003},
|
|
315 volume = {13},
|
|
316 number = {9},
|
|
317 pages = {2178-2189},}
|
|
318 </citation>
|
|
319 <citation type="bibtex">
|
|
320 @article{Emms2015,
|
|
321 journal = {Genome Biology}
|
|
322 author = {4. Emms DM, Kelly S},
|
|
323 title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
|
|
324 year = {2015},
|
|
325 volume = {16},
|
|
326 number = {1},
|
|
327 pages = {157},}
|
|
328 </citation>
|
|
329 <citation type="bibtex">
|
|
330 @article{Altschul1990,
|
|
331 journal = {Journal of molecular biology}
|
|
332 author = {5. Altschul SF, Gish W, Miller W, Myers EW, Lipman DJ},
|
|
333 title = {Basic local alignment search tool},
|
|
334 year = {1990},
|
|
335 volume = {215},
|
|
336 number = {3},
|
|
337 pages = {403-410},}
|
|
338 </citation>
|
|
339 <citation type="bibtex">
|
|
340 @article{Eddy2009,
|
|
341 journal = {Genome Inform},
|
|
342 author = {6. Eddy SR},
|
|
343 title = {A new generation of homology search tools based on probabilistic inference},
|
|
344 year = {2009},
|
|
345 volume = {23},
|
|
346 number = {1},
|
|
347 pages = {205-211},}
|
|
348 </citation>
|
|
349 <citation type="bibtex">
|
|
350 @article{Enright2002,
|
|
351 journal = {Nucleic acids research},
|
|
352 author = {7. Enright AJ, Van Dongen S, Ouzounis CA},
|
|
353 title = {n efficient algorithm for large-scale detection of protein families},
|
|
354 year = {2002},
|
|
355 volume = {30},
|
|
356 number = {7},
|
|
357 pages = {1575-1584},}
|
|
358 </citation>
|
|
359 <citation type="bibtex">
|
118
|
360 @article{None,
|
|
361 journal = {GitHub repository},
|
121
|
362 author = {8. None},
|
118
|
363 title = {HMMER 3.1+ hmmscan search sequence(s) against a profile database},
|
|
364 year = {2013},
|
|
365 url = {http://hmmer.org},}
|
|
366 </citation>
|
119
|
367 <citation type="doi">10.1186/1471-2105-10-421</citation>
|
0
|
368 </citations>
|
|
369 </tool>
|