|
9
|
1 <tool id="phyml-sms" name="PhyML-SMS" version="1.3.1">
|
|
|
2 <description>Maximum likelihood-based inference of phylogenetic trees with Smart Model Selection</description>
|
|
|
3 <requirements>
|
|
|
4 <requirement type="package" version="1.3">phyml-sms</requirement>
|
|
|
5 </requirements>
|
|
|
6 <command><![CDATA[
|
|
12
|
7
|
|
9
|
8 sms.sh
|
|
|
9 -i $input
|
|
|
10 -o .
|
|
|
11 -p $output_models
|
|
|
12 -t
|
|
12
|
13
|
|
|
14 #if $sequence.seqtype == "cfg"
|
|
|
15 ## Automatic sequence detection
|
|
|
16 ## read an info file to choose which option set
|
|
|
17 #set $info = open( str($input_info) ).read()
|
|
|
18
|
|
|
19 #if 'dna' in $info:
|
|
|
20 -d nt
|
|
|
21 #else if 'protein' in $info :
|
|
|
22 -d aa
|
|
16
|
23 #end if
|
|
12
|
24 #else
|
|
|
25 -d $sequence.seqtype
|
|
16
|
26 #end if
|
|
17
|
27
|
|
9
|
28 -c $stat_crit
|
|
|
29
|
|
|
30 #if $inpuTree.inputtree == "true" :
|
|
|
31 -u $inpuTree.userInpuTree
|
|
|
32 #end if
|
|
|
33 -s $move
|
|
|
34
|
|
|
35 #if $support_condition.support == "sh":
|
|
|
36 -b -4
|
|
|
37 #else if $support_condition.support == "aBayes":
|
|
|
38 -b -5
|
|
|
39 #else if $support_condition.support == "no":
|
|
|
40 -b 0
|
|
|
41 #else if $support_condition.support == "boot":
|
|
|
42 -b $support_condition.boot_number
|
|
|
43 #end if
|
|
|
44
|
|
|
45 #if $randstart.value != 0 and $move.value == "SPR" :
|
|
|
46 -r $randstart
|
|
|
47 #end if
|
|
|
48 > $output_stdout
|
|
|
49 ;
|
|
|
50 mv *_phyml_tree.txt $output_tree;
|
|
|
51 mv *_phyml_stats.txt $output_stats;
|
|
|
52 ]]>
|
|
|
53 </command>
|
|
|
54 <inputs>
|
|
|
55 <param format="phylip" name="input" type="data" label="Alignment file" help="phylip format"/>
|
|
|
56 <conditional name="sequence">
|
|
|
57 <param name="seqtype" type="select" label="Data type">
|
|
|
58 <option value="nt">Nucleic acids</option>
|
|
|
59 <option value="aa">Amino acids</option>
|
|
15
|
60 <option value="cfg">Auto</option>
|
|
17
|
61 </param>
|
|
|
62 <when value="nt" />
|
|
|
63 <when value="aa" />
|
|
12
|
64 <when value="cfg">
|
|
|
65 <param name="input_info" type="data" format="txt" label="info" help="Precompute file containning sequence description (dna or protein)" />
|
|
|
66 </when>
|
|
|
67 </conditional>
|
|
9
|
68 <param name="stat_crit" type="select" label="Statistical criterion to select the model">
|
|
|
69 <option value="aic">AIC</option>
|
|
|
70 <option value="bic">BIC</option>
|
|
|
71 </param>
|
|
|
72 <param name="move" type="select" label="Tree topology search" display="radio">
|
|
|
73 <option value="NNI">NNI (Nearest Neighbor Interchange)</option>
|
|
|
74 <option value="SPR">SPR (Subtree Pruning and Regraphing)</option>
|
|
|
75 </param>
|
|
|
76 <conditional name="support_condition">
|
|
|
77 <param type="select" name="support" label="Branch support" help="Use aLRT or aBayes to save computing time">
|
|
|
78 <option value="sh">SH-like aLRT</option>
|
|
|
79 <option value="aBayes">aBayes</option>
|
|
|
80 <option value="boot">Bootstrap</option>
|
|
|
81 <option value="no">No branch support</option>
|
|
|
82 </param>
|
|
|
83 <when value="sh"/>
|
|
|
84 <when value="aBayes"/>
|
|
|
85 <when value="boot">
|
|
|
86 <param type="integer" name="boot_number" min="1" value="100" label="Number of bootstrap replicates" help="Must be a positive integer"/>
|
|
|
87 </when>
|
|
|
88 <when value="no"/>
|
|
|
89 </conditional>
|
|
|
90 <conditional name="inpuTree">
|
|
|
91 <param name="inputtree" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Use input starting tree" />
|
|
|
92 <when value="true">
|
|
|
93 <param name="userInpuTree" type="data" label="Tree file" help="newick format"/>
|
|
|
94 </when>
|
|
|
95 <when value="false"/>
|
|
|
96 </conditional>
|
|
|
97 <param type="integer" name="randstart" value="0" min="0" max="10" label="Number of random starting trees" />
|
|
|
98 </inputs>
|
|
|
99 <outputs>
|
|
|
100 <data format="nhx" name="output_tree" label="PhyML Newick tree"/>
|
|
|
101 <data format="txt" name="output_stats" label="PhyML Statistics"/>
|
|
|
102 <data format="txt" name="output_stdout" label="SMS Best Model"/>
|
|
|
103 <data format="txt" name="output_models" label="SMS compare models"/>
|
|
|
104 </outputs>
|
|
|
105 <tests>
|
|
|
106 <test>
|
|
|
107 <param name="input" value="phylip" />
|
|
|
108 <param name="seqtype" value="nt" />
|
|
|
109 <output name="output_models" file="sms.csv" />
|
|
|
110
|
|
|
111 </test>
|
|
|
112 </tests>
|
|
|
113 <help><![CDATA[
|
|
|
114
|
|
|
115 .. class:: infomark
|
|
|
116
|
|
|
117 This script runs SMS to select the substitution model which best fits the input data.
|
|
|
118 It may also run PhyML with the selected model.
|
|
18
|
119
|
|
|
120 SMS options ::
|
|
|
121
|
|
9
|
122 -h = Help
|
|
|
123 -i = **Mandatory** Input alignment file in PHYLIP format
|
|
|
124 -d = **Mandatory** Data type : 'aa' or 'nt'
|
|
|
125 -o = Output directory
|
|
|
126 -p = Output CSV filename
|
|
|
127 -c = Statistical criterion to select the model : 'AIC' (default) or 'BIC'
|
|
|
128 -u = Input starting tree (Newick format)
|
|
|
129 -t = Use this option if you want SMS to infer a tree with PhyML using the SMS selected model
|
|
18
|
130
|
|
|
131 PhyML options ::
|
|
|
132
|
|
9
|
133 -s = Type of tree improvement : 'NNI (default)' or 'SPR'
|
|
|
134 -r = Number of random starting trees : 0 (default)
|
|
|
135 -b = Branch Support : >0 for bootstraps, -4 for aLRT, 0 (default)
|
|
|
136
|
|
18
|
137
|
|
9
|
138 **PhyML 20120412**
|
|
|
139
|
|
|
140 -----
|
|
|
141
|
|
|
142
|
|
|
143 ===========
|
|
|
144 Overview:
|
|
|
145 ===========
|
|
|
146
|
|
|
147 PhyML is a phylogeny software based on the maximum-likelihood principle. Early PhyML versions used a fast algorithm to perform Nearest Neighbor Interchanges (NNIs), in order to improve a reasonable starting tree topology. Since the original publication (Guindon and Gascuel 2003), PhyML has been widely used due to its simplicity and a fair accuracy/speed compromise. In the mean time research around PhyML has continued.
|
|
|
148
|
|
|
149 We designed an efficient algorithm to search the tree space using Subtree Pruning and Regrafting (SPR) topological moves (Hordijk and Gascuel 2005), and proposed a fast branch test based on an approximate likelihood ratio test (Anisimova and Gascuel 2006). However, these novelties were not included in the official version of PhyML, and we found that improvements were still needed in order to make them effective in some practical cases. PhyML 3.0 achieves this task.
|
|
|
150
|
|
|
151 It implements new algorithms to search the space of tree topologies with user-defined intensity. A non-parametric, Shimodaira-Hasegawa-like branch test is also available. The program provides a number of new evolutionary models and its interface was entirely re-designed. We tested PhyML 3.0 on a large collection of real data sets to ensure that the new version is stable, ready-to-use and still reasonably fast and accurate.
|
|
|
152
|
|
|
153 -----
|
|
|
154
|
|
|
155 For further informations, please visite the PhyML_ and SMS_ website.
|
|
|
156
|
|
|
157
|
|
|
158 .. _PhyML: http://www.atgc-montpellier.fr/phyml/
|
|
|
159 .. _SMS: http://www.atgc-montpellier.fr/phyml-sms/
|
|
|
160
|
|
|
161
|
|
|
162 Models References:
|
|
|
163
|
|
|
164 - Dayhoff : Dayhoff, M., Schwartz, R. & Orcutt, B.
|
|
|
165 A model of evolutionary change in proteins.
|
|
|
166 In Dayhoff, M. (ed.) Atlas of Protein Sequence and Structure, vol. 5, 345–352 (National Biomedical Research Foundation, Washington, D. C., 1978)
|
|
|
167
|
|
|
168 - JTT : Jones, D., Taylor, W. & Thornton, J.
|
|
|
169 The rapid generation of mutation data matrices from protein sequences.
|
|
|
170 Computer Applications in the Biosciences (CABIOS) 8, 275–282 (1992).
|
|
|
171
|
|
|
172 - Blosum62 : Henikoff, S. & Henikoff, J.
|
|
|
173 Amino acid substitution matrices from protein blocks.
|
|
|
174 Proceedings of the National Academy of Sciences of the United States of America (PNAS) 89, 10915–10919 (1992).
|
|
|
175
|
|
|
176 - MtREV : Adachi, J. & Hasegawa, M.
|
|
|
177 MOLPHY version 2.3. programs for molecular phylogenetics based on maximum likelihood.
|
|
|
178 In Ishiguro, M. et al. (eds.) Computer Science Monographs, 28 (The Institute of Statistical Mathematics, Tokyo,1996).
|
|
|
179
|
|
|
180 - MtMam : Cao, Y., A. Janke , P. J. Waddell, M. Westerman, O. Takenaka, S. Murata, N. Okada, S. Paabo, and M. Hasegawa
|
|
|
181 Conflict among individual mitochondrial proteins in resolving the phylogeny of eutherian orders.
|
|
|
182 Journal of Molecular Evolution 47, 307–322 (1998).
|
|
|
183
|
|
|
184 - CpREV : Adachi, J., P., W., Martin, W. & Hasegawa, M.
|
|
|
185 Plastid genome phylogeny and a model of amino acid substitution for proteins encoded by chloroplast DNA.
|
|
|
186 Journal of Molecular Evolution 50, 348–358 (2000).
|
|
|
187
|
|
|
188 - VT : Muller, T. & Vingron, M.
|
|
|
189 Modeling amino acid replacement.
|
|
|
190 Journal of Computational Biology 7, 761–776 (2000).
|
|
|
191
|
|
|
192 - WAG : Whelan, S. & Goldman, N.
|
|
|
193 A general empirical model of protein evolution derived from multiple protein families using a maximum-likelihood approach.
|
|
|
194 Molecular Biology and Evolution 18, 691–699 (2001).
|
|
|
195
|
|
|
196 - RtREV : Dimmic, M., Rest, J., Mindell, D. & Goldstein, D.
|
|
|
197 rtREV : an amino acid substitution matrix for inference of retrovirus and reverse transcriptase phylogeny.
|
|
|
198 Journal of Molecular Evolution 55, 65–73 (2002).
|
|
|
199
|
|
|
200 - DCMut : Kosiol, C. & Goldman, N.
|
|
|
201 Different versions of the Dayhoff rate matrix.
|
|
|
202 Molecular Biology and Evolution 22, 193–199 (2004).
|
|
|
203
|
|
|
204 - MtArt : Abascal F, Posada D, Zardoya R.
|
|
|
205 MtArt: a new model of amino acid replacement for Arthropoda.
|
|
|
206 Mol Biol Evol. 2007 Jan;24(1):1-5. Epub 2006 Oct 16.
|
|
|
207
|
|
|
208 - HIVb - HIVw : Nickle DC, Heath L, Jensen MA, Gilbert PB, Mullins JI, Kosakovsky Pond SL.
|
|
|
209 HIV-Specific Probabilistic Models of Protein Evolution.
|
|
|
210 PLoS ONE. 2007 Jun 6;2:e503.
|
|
|
211
|
|
|
212 - LG : Le, S. & Gascuel, O.
|
|
|
213 An improved general amino-acid replacement matrix.
|
|
|
214 Mol. Biol. Evol. 25, 1307–1320 (2008)
|
|
|
215
|
|
|
216 - MtZOA : Rota-Stabelli O, Yang Z, Telford MJ
|
|
|
217 MtZoa: A general mitochondrial amino acid substitutions model for animal evolutionary studies
|
|
|
218 Molecular Phylogenetics and Evolution 52 (2009) 268–272
|
|
|
219
|
|
|
220 - FLU : Cuong Cao Dang, Quang Si Le2, Olivier Gascuel and Vinh Sy Le
|
|
|
221 FLU, an amino acid substitution model for influenza proteins.
|
|
|
222 BMC Evolutionary Biology 2010 Apr 12;10:99
|
|
|
223
|
|
|
224 - AB :
|
|
|
225 Alexander Mirsky,Linda Kazandjian and Maria Anisimova
|
|
|
226 Antibody-specific Model of Amino Acid Substitution for Immunological Inferences from Alignments of Antibody Sequences
|
|
|
227 Mol Biol Evol (2014) doi: 10.1093/molbev/msu340
|
|
|
228 ]]>
|
|
|
229 </help>
|
|
|
230 <citations>
|
|
|
231 <citation type="doi">10.1093/sysbio/syq010</citation>
|
|
|
232 <citation type="doi">10.1093/nar/gki352</citation>
|
|
|
233 <citation type="doi">10.1080/10635150390235520</citation>
|
|
|
234 </citations>
|
|
|
235 </tool>
|