41
|
1 <tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.2">
|
16
|
2 <description>aligns integrated orthologous gene family clusters</description>
|
0
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements_gene_family_aligner" />
|
36
|
7 <command detect_errors="exit_code"><![CDATA[
|
|
8 #set input_format = $input_format_cond.input_format
|
|
9 #set alignment_method_cond = $input_format_cond.alignment_method_cond
|
|
10 #set alignment_method = $alignment_method_cond.alignment_method
|
|
11 #if str($input_format_cond.input_format) == 'ptortho':
|
|
12 #set output_codon_alignments = False
|
|
13 #else if str($input_format_cond.input_format) == 'ptorthocs' and str($input_format_cond.codon_alignments ) == 'no':
|
|
14 #set output_codon_alignments = False
|
|
15 #else:
|
|
16 #set output_codon_alignments = True
|
|
17 #end if
|
0
|
18
|
38
|
19 python '$__tool_directory__/gene_family_aligner.py'
|
36
|
20 --alignment_method $alignment_method
|
|
21 #if str($alignment_method) == 'pasta':
|
|
22 --pasta_script_path '$__tool_directory__/run_pasta.py'
|
|
23 --pasta_iter_limit $alignment_method_cond.pasta_iter_limit
|
|
24 #end if
|
|
25 --num_threads \${GALAXY_SLOTS:-4}
|
|
26 #if str($input_format) == 'ptortho':
|
|
27 --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path'
|
|
28 #else:
|
|
29 ## str($input_format) == 'ptorthocs'
|
|
30 --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path'
|
|
31 #if str($input_format_cond.codon_alignments) == 'yes':
|
38
|
32 --codon_alignments true
|
36
|
33 #end if
|
|
34 #end if
|
|
35 #set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences
|
|
36 #if str($remove_gappy_sequences) == 'yes':
|
|
37 #set trim_type_cond = $remove_gappy_sequences_cond.trim_type_cond
|
|
38 #set trim_type = $trim_type_cond.trim_type
|
|
39 #if str($trim_type) == 'gap_trimming' and str($trim_type_cond.gap_trimming):
|
|
40 --gap_trimming $trim_type_cond.gap_trimming
|
|
41 #else:
|
|
42 ## str($trim_type) == 'automated_trimming'
|
38
|
43 --automated_trimming true
|
36
|
44 #end if
|
|
45 #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond
|
|
46 #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps
|
|
47 #if str($remove_sequences_with_gaps) == 'yes':
|
|
48 #if str($remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of):
|
|
49 --remove_sequences $remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of
|
|
50 #end if
|
|
51 #if str($remove_sequences_with_gaps_cond.iterative_realignment):
|
|
52 --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment
|
|
53 #end if
|
|
54 #if $output_codon_alignments:
|
|
55 --output '$output_aln_filtered_ca'
|
|
56 --output_dir '$output_aln_filtered_ca.files_path'
|
|
57 #else:
|
|
58 --output '$output_aln_filtered'
|
|
59 --output_dir '$output_aln_filtered.files_path'
|
|
60 #end if
|
|
61 #else:
|
|
62 #if $output_codon_alignments:
|
|
63 --output '$output_aln_trimmed_ca'
|
|
64 --output_dir '$output_aln_trimmed_ca.files_path'
|
|
65 #else:
|
|
66 --output '$output_aln_trimmed'
|
|
67 --output_dir '$output_aln_trimmed.files_path'
|
|
68 #end if
|
|
69 #end if
|
|
70 #else:
|
|
71 #if $output_codon_alignments:
|
|
72 --output '$output_aln_ca'
|
|
73 --output_dir '$output_aln_ca.files_path'
|
|
74 #else:
|
|
75 --output '$output_aln'
|
|
76 --output_dir '$output_aln.files_path'
|
|
77 #end if
|
|
78 #end if
|
38
|
79 #if str($output_dataset_collection) == 'yes':
|
|
80 --output_dataset_collection dataset_collection
|
|
81 #end if
|
36
|
82 ]]></command>
|
0
|
83 <inputs>
|
|
84 <conditional name="input_format_cond">
|
16
|
85 <param name="input_format" type="select" label="Classified orthogroup fasta files">
|
|
86 <option value="ptortho">Proteins orthogroup fasta files</option>
|
|
87 <option value="ptorthocs">Protein and coding sequences orthogroup fasta files</option>
|
0
|
88 </param>
|
|
89 <when value="ptortho">
|
16
|
90 <param name="input_ptortho" format="ptortho" type="data" label="Proteins orthogroup fasta files">
|
19
|
91 <validator type="empty_extra_files_path" />
|
14
|
92 </param>
|
0
|
93 <expand macro="cond_alignment_method" />
|
|
94 </when>
|
|
95 <when value="ptorthocs">
|
16
|
96 <param name="input_ptorthocs" format="ptorthocs" type="data" label="Protein and coding sequences orthogroup fasta files">
|
19
|
97 <validator type="empty_extra_files_path" />
|
14
|
98 </param>
|
0
|
99 <expand macro="cond_alignment_method" />
|
|
100 <expand macro="param_codon_alignments" />
|
|
101 </when>
|
|
102 </conditional>
|
12
|
103 <expand macro="cond_remove_gappy_sequences" />
|
42
|
104 <param name="output_dataset_collection" type="select" display="radio" label="Output additional dataset collection of files?">
|
|
105 <option value="no" selected="true">No</option>
|
|
106 <option value="yes">Yes</option>
|
|
107 </param>
|
0
|
108 </inputs>
|
|
109 <outputs>
|
35
|
110 <data name="output_aln" format="ptalign" label="${tool.name} (proteins orthogroup alignments) on ${on_string}">
|
12
|
111 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
|
10
|
112 </data>
|
35
|
113 <data name="output_aln_ca" format="ptalignca" label="${tool.name} (protein and coding sequences orthogroup alignments) on ${on_string}">
|
12
|
114 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter>
|
8
|
115 </data>
|
35
|
116 <data name="output_aln_filtered" format="ptalignfiltered" label="${tool.name} (filtered proteins orthogroup alignments) on ${on_string}">
|
12
|
117 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter>
|
10
|
118 </data>
|
35
|
119 <data name="output_aln_filtered_ca" format="ptalignfilteredca" label="${tool.name} (filtered protein and coding sequences orthogroup alignments) on ${on_string}">
|
12
|
120 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter>
|
8
|
121 </data>
|
35
|
122 <data name="output_aln_trimmed" format="ptaligntrimmed" label="${tool.name} (trimmed proteins orthogroup alignments) on ${on_string}">
|
12
|
123 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter>
|
10
|
124 </data>
|
35
|
125 <data name="output_aln_trimmed_ca" format="ptaligntrimmedca" label="${tool.name} (trimmed protein and coding sequences orthogroup alignments) on ${on_string}">
|
12
|
126 <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter>
|
8
|
127 </data>
|
38
|
128 <collection name="dataset_collection" type="list" label="${tool.name} (dataset collection) on ${on_string}">
|
39
|
129 <discover_datasets pattern="__name__" directory="dataset_collection" format="fasta" />
|
42
|
130 <filter>output_dataset_collection == 'yes'</filter>
|
38
|
131 </collection>
|
0
|
132 </outputs>
|
|
133 <tests>
|
18
|
134 <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed.
|
0
|
135 <test>
|
|
136 </test>
|
|
137 -->
|
|
138 </tests>
|
|
139 <help>
|
|
140 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
|
16
|
141 analyses of genome-scale gene families and transcriptomes. This tool estimates protein and codon multiple sequence alignments
|
|
142 of integrated orthologous gene family fasta files produced by the GeneFamilyIntegrator tool.
|
0
|
143
|
|
144 -----
|
|
145
|
|
146 **Required options**
|
|
147
|
28
|
148 * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences.
|
0
|
149
|
16
|
150 * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments. PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments.
|
0
|
151
|
20
|
152 - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations.
|
0
|
153
|
23
|
154 * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments. This option requires protein and their corresponding coding sequences to be provided as input data.
|
|
155
|
0
|
156 **Other options**
|
|
157
|
16
|
158 * **Alignment post-processing configuration** - select 'Yes' to enable multiple sequence alignment post-processing configuration options.
|
0
|
159
|
16
|
160 - **Trimming method** - multiple sequence alignment trimming method. PlantTribes trims alignments using two automated approaches implemented in trimAl. Gap score based trimming removes alignments sites that do not achieve a user specified gap score. For example, a setting of 0.1 removes sites that have gaps in 90% or more of the sequences in the multiple sequence alignment. The automated heuristic trimming approach determines the best automated trimAl method to trim a given alignment as described in the trimAl tutorial `trimAl`_.
|
0
|
161
|
22
|
162 - **Gap score** - the fraction of sequences with gap allowed in an alignment site. The score is restricted to the range 0.0 - 1.0. Zero value has no effect.
|
16
|
163
|
|
164 - **Remove sequences** - select 'Yes' to remove sequences in multiple sequence alignments that do not achieve a user specified alignment coverage score. For example, a setting of 0.7 removes sequences with more than 30% gaps in the alignment. This option requires one of the trimming methods to be set.
|
|
165
|
22
|
166 - **Coverage score** - minimum fraction of sites without gaps for a sequence in a multiple sequence alignment. The score is restricted to the range 0.0 - 1.0. Zero value has no effect.
|
16
|
167
|
22
|
168 - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences. Zero value has no effect.
|
0
|
169
|
38
|
170 * **Output additional dataset collection of files** - selecting 'Yes' will produce an additional output dataset collection whose elements are copies of the directories of files (these elements can be viewed with visualization tools).
|
|
171
|
18
|
172 .. _trimAl: http://trimal.cgenomics.org
|
|
173
|
0
|
174 </help>
|
|
175 <citations>
|
|
176 <expand macro="citation1" />
|
16
|
177 <citation type="bibtex">
|
|
178 @article{Wall2008,
|
|
179 journal = {Nucleic Acids Research},
|
|
180 author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
|
|
181 title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
|
|
182 year = {2008},
|
|
183 volume = {36},
|
|
184 number = {suppl 1},
|
|
185 pages = {D970-D976},}
|
|
186 </citation>
|
|
187 <citation type="bibtex">
|
|
188 @article{Katoh2013,
|
|
189 journal = {Molecular biology and evolution},
|
|
190 author = {3. Katoh K, Standley DM},
|
|
191 title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability},
|
|
192 year = {2013},
|
|
193 volume = {30},
|
|
194 number = {4},
|
|
195 pages = {772-780},}
|
|
196 </citation>
|
|
197 <citation type="bibtex">
|
|
198 @article{Mirarab2014,
|
|
199 journal = {Research in Computational Molecular Biology (RECOMB)},
|
|
200 author = {4. Mirarab S, Nguyen N, Warnow T},
|
|
201 title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)},
|
|
202 year = {2014},
|
|
203 pages = {177–191},
|
|
204 url = {https://github.com/smirarab/pasta},}
|
|
205 </citation>
|
|
206 <citation type="bibtex">
|
|
207 @article{Capella-Gutierrez2009,
|
|
208 journal = {Bioinformatics,},
|
|
209 author = {5. Capella-Gutierrez S, Silla-Martínez JM, Gabaldón T},
|
|
210 title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses},
|
|
211 year = {2009},
|
|
212 volume = {25},
|
|
213 number = {15},
|
|
214 pages = {1972-1973},}
|
|
215 </citation>
|
40
|
216 <citation type="bibtex">
|
|
217 @article{Yachdav2016,
|
|
218 journal = {Bioinformatics,},
|
|
219 author = {6. Yachdav G, Wilzbach S, Rauscher B, Sheridan R, Sillitoe I, Procter J, Lewis SE, Rost B, Goldberg T},
|
|
220 title = {MSAViewer: interactive JavaScript visualization of multiple sequence alignments},
|
|
221 year = {2016},
|
|
222 volume = {32},
|
|
223 number = {22},
|
|
224 pages = {3501-3503},}
|
|
225 </citation>
|
|
226 <citation type="bibtex">
|
|
227 @article{Clamp2004,
|
|
228 journal = {Bioinformatics,},
|
|
229 author = {7. Clamp M, Cuff J, Searle SM, Barton GJ},
|
|
230 title = {The jalview java alignment editor},
|
|
231 year = {2004},
|
|
232 volume = {20},
|
|
233 number = {3},
|
|
234 pages = {426-427},}
|
|
235 </citation>
|
0
|
236 </citations>
|
|
237 </tool>
|