comparison hyphy_meme.xml @ 35:ff2724696a7d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
author iuc
date Tue, 07 Oct 2025 20:39:19 +0000
parents 07d7b41c670b
children
comparison
equal deleted inserted replaced
34:07d7b41c670b 35:ff2724696a7d
12 @INPUT_TREE@ 12 @INPUT_TREE@
13 --code '$gencodeid' 13 --code '$gencodeid'
14 @branch_options@ 14 @branch_options@
15 --pvalue '$p_value' 15 --pvalue '$p_value'
16 --resample $resample 16 --resample $resample
17 --rates $rates
18 --multiple-hits $multiple_hits_conditional.multiple_hits
19 #if $multiple_hits_conditional.multiple_hits != "None":
20 --site-multihit $multiple_hits_conditional.site_multihit
21 #end if
22 --impute-states '$impute_states'
23 --precision $precision
24 --kill-zero-lengths $kill_zero_lengths
25 #if $restrict_sites_conditional.restrict_sites_flag == "true":
26 --limit-to-sites '$restrict_sites_conditional.limit_to_sites'
27 --save-lf-for-sites '$restrict_sites_conditional.save_lf_for_sites'
28 #end if
17 --output '$meme_output' 29 --output '$meme_output'
18 --full-model '$full_model' 30 --full-model '$full_model'
31 > meme_stdout.md
19 @ERRORS@ 32 @ERRORS@
20 ]]></command> 33 ]]></command>
21 <inputs> 34 <inputs>
22 <expand macro="inputs"/> 35 <expand macro="inputs"/>
23 <expand macro="gencode"/> 36 <expand macro="gencode"/>
24 <expand macro="branches"/> 37 <expand macro="branches"/>
25 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/> 38 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/>
26 <expand macro="resample"/> 39 <section name="advanced_options" title="Advanced Options" expanded="false">
27 <param argument="--full-model" type="boolean" truevalue="Yes" falsevalue="No" label="Perform branch length re-optimization under the full codon model" /> 40 <expand macro="resample"/>
41 <param argument="--rates" type="integer" value="2" min="2" max="4" label="Number of omega rate classes"/>
42 <conditional name="multiple_hits_conditional">
43 <param argument="--multiple-hits" type="select" label="Include support for multiple nucleotide substitutions">
44 <option value="Double">Include branch-specific rates for double nucleotide substitutions</option>
45 <option value="Double+Triple">Include branch-specific rates for double and triple nucleotide substitutions</option>
46 <option value="None" selected="true">Use standard models which permit only single nucleotide changes to occur instantly</option>
47 </param>
48 <when value="Double">
49 <param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site">
50 <option value="Estimate" selected="true">Estimate</option>
51 <option value="No">No</option>
52 </param>
53 </when>
54 <when value="Double+Triple">
55 <param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site">
56 <option value="Estimate" selected="true">Estimate</option>
57 <option value="No">No</option>
58 </param>
59 </when>
60 <when value="None">
61 </when>
62 </conditional>
63 <param argument="--impute-states" type="boolean" truevalue="Yes" falsevalue="No" label="Impute likely character states for each sequence"/>
64 <param argument="--precision" type="select" label="Optimization precision for preliminary fits">
65 <option value="standard">Standard</option>
66 <option value="reduced">Reduced for faster fitting</option>
67 </param>
68 <expand macro="kill_zero_lengths_param"/>
69 <conditional name="restrict_sites_conditional">
70 <param name="restrict_sites_flag" type="select" label="Restrict MEME analysis to a subset of sites" help="If Yes, allows specifying a subset of sites for analysis.">
71 <option value="true">Yes</option>
72 <option value="false" selected="true">No</option>
73 </param>
74 <when value="true">
75 <param argument="--limit-to-sites" type="text" optional="true" label="Only analyze sites whose 1-based indices match the following list (null to skip)" help="Comma-separated list of site indices."/>
76 <param argument="--save-lf-for-sites" type="text" optional="true" label="For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip)" help="Comma-separated list of site indices."/>
77 </when>
78 <when value="false">
79 </when>
80 </conditional>
81 <param argument="--full-model" type="boolean" truevalue="Yes" falsevalue="No" checked="true" label="Perform branch length re-optimization under the full codon model" />
82 </section>
28 </inputs> 83 </inputs>
29 <outputs> 84 <outputs>
30 <data name="meme_output" format="hyphy_results.json" /> 85 <data name="meme_output" format="hyphy_results.json" />
86 <data name="meme_md_report" format="markdown" from_work_dir="meme_stdout.md" label="MEME Report (Markdown) for ${tool.name} on ${on_string}" />
31 </outputs> 87 </outputs>
32 <tests> 88 <tests>
33 <test> 89 <test expect_num_outputs="2">
34 <param name="input_file" ftype="fasta" value="meme-in1.fa"/> 90 <param name="input_file" ftype="fasta" value="meme-in1.fa"/>
35 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/> 91 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/>
36 <conditional name="branch_cond"> 92 <conditional name="branch_cond">
37 <param name="branch_sel" value="All"/> 93 <param name="branch_sel" value="All"/>
38 </conditional> 94 </conditional>
39 <param name="p_value" value="0.1"/> 95 <param name="p_value" value="0.1"/>
40 <output name="meme_output" file="meme-out1.json" compare="sim_size" delta="25000"/> 96 <output name="meme_output">
97 <assert_contents>
98 <has_text text="fits"/>
99 <has_text text="branch attributes"/>
100 <has_text text="Global MG94xREV"/>
101 </assert_contents>
102 </output>
103 <output name="meme_md_report">
104 <assert_contents>
105 <has_text text="### For partition 1 these sites are significant at p &lt;=0.1"/>
106 </assert_contents>
107 </output>
108 </test>
109 <test expect_num_outputs="2">
110 <param name="input_file" ftype="fasta" value="meme-in1.fa"/>
111 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/>
112 <conditional name="branch_cond">
113 <param name="branch_sel" value="Internal"/>
114 </conditional>
115 <section name="advanced_options">
116 <param name="rates" value="3"/>
117 </section>
118 <output name="meme_output">
119 <assert_contents>
120 <has_text text="fits"/>
121 <has_text text="branch attributes"/>
122 <has_text text="Mixture distribution weight allocated to negative/neutral evolution component 2"/>
123 </assert_contents>
124 </output>
125 <output name="meme_md_report">
126 <assert_contents>
127 <has_text text=">rates => 3"/>
128 <has_text text="non-syn rate (beta) distribution, rates : weights"/>
129 </assert_contents>
130 </output>
131 </test>
132 <test expect_num_outputs="2">
133 <param name="input_file" ftype="fasta" value="meme-in1.fa"/>
134 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/>
135 <section name="advanced_options">
136 <param name="impute_states" value="Yes"/>
137 <conditional name="multiple_hits_conditional">
138 <param name="multiple_hits" value="Double"/>
139 </conditional>
140 </section>
141
142 <output name="meme_output">
143 <assert_contents>
144 <has_text text="fits"/>
145 <has_text text="Imputed States"/>
146 <has_text text="Relative rate estimate for 2-nucleotide substitutions"/>
147 </assert_contents>
148 </output>
149 <output name="meme_md_report">
150 <assert_contents>
151 <has_text text="rate at which 2 nucleotides are changed instantly within a single codon"/>
152 </assert_contents>
153 </output>
41 </test> 154 </test>
42 </tests> 155 </tests>
43 <help><![CDATA[ 156 <help><![CDATA[
44 MEME: Mixed Effects Model of Evolution 157 MEME: Mixed Effects Model of Evolution
45 ====================================== 158 ======================================
46 159
47 What question does this method answer? 160 **What question does this method answer?**
48 --------------------------------------
49 161
50 Which site(s) in a gene are subject to pervasive or *episodic*, i.e. only on a 162 Which site(s) in a gene are subject to pervasive or *episodic*, i.e. only on a
51 single lineage or subset of lineages, diversifying selection? 163 single lineage or subset of lineages, diversifying selection?
52 164
53 Recommended Applications 165 **Recommended Applications**
54 ------------------------
55 166
56 The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics 167 The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics
57 (or balancing selection), including adaptive immune escape by viruses. 168 (or balancing selection), including adaptive immune escape by viruses.
58 MEME is ideally suited to identify sites under positive selection which 169 MEME is ideally suited to identify sites under positive selection which
59 represent candidate sites subject to strong selective pressures across the entire phylogeny or only on parts of the phylogeny. 170 represent candidate sites subject to strong selective pressures across the entire phylogeny or only on parts of the phylogeny.
60 171
61 MEME is the sole method in HyPhy for detecting selection at individual sites that considers both pervasive and episodic selection. 172 MEME is the sole method in HyPhy for detecting selection at individual sites that considers both pervasive and episodic selection.
62 MEME is therefore our recommended method if maximum power is desired. 173 MEME is therefore our recommended method if maximum power is desired.
63 174
64 175 **Methodology**
65 Brief description 176
66 ----------------- 177 MEME (Mixed Effects Model of Evolution) is a powerful statistical method for detecting sites in a coding alignment that have been subject to positive selection. It extends classical fixed-effects likelihood (FEL) models by allowing the non-synonymous substitution rate (dN) to vary from branch to branch at a given site. This "mixed-effects" approach provides increased power to detect episodic selection, where a site may be under positive selection in some lineages but under neutral or purifying selection in others.
67 178
68 MEME (Mixed Effects Model of Evolution) estimates a site-wise synonymous 179 **The Intuition**
69 (alpha) and a two-category mixture of non-synonymous (beta-, with 180
70 proportion p-, and beta+ with proportion [1-p-]) rates, and uses a 181 Imagine you are studying the evolution of a gene across a group of species. Some sites in that gene might be under constant pressure to change (pervasive selection), while others might only experience this pressure for a short period of time in a specific lineage (episodic selection). For example, a virus might evolve a new protein to escape the host's immune system, but once the host population adapts, the pressure on that protein might disappear.
71 likelihood ratio test to determine if beta+ > &alpha; at a site. The 182
72 estimates aggregate information over a proportion of branches at a site, 183 Standard methods that assume a single dN/dS rate across the entire phylogeny might miss this kind of episodic selection. MEME addresses this by modeling the dN/dS ratio at each site as a mixture of two or more rate classes. For each site, MEME infers the probability that it evolves under each rate class on a given branch. This allows the model to identify sites that show evidence of positive selection (dN/dS > 1) even if that selection is confined to a small number of lineages.
73 so the signal is derived from episodic diversification, which is a 184
74 combination of strength of selection [effect size] and the proportion of 185 **The Test**
75 the tree affected. A subset of branches can be selected for testing as 186
76 well, in which case an additional (nuisance) parameter will be inferred 187 For each site, MEME fits a baseline model where dN/dS is constrained to be less than or equal to 1, and an alternative model where a proportion of branches are allowed to have a dN/dS ratio greater than 1. A likelihood ratio test (LRT) is then used to determine if the alternative model provides a significantly better fit to the data. A significant p-value indicates that the site has experienced episodic diversifying selection.
77 -- the non-synonymous rate on branches NOT selected for testing. 188
78 189 **Input**
79 Input
80 -----
81 190
82 1. A *FASTA* sequence alignment. 191 1. A *FASTA* sequence alignment.
83 2. A phylogenetic tree in the *Newick* format 192 2. A phylogenetic tree in the *Newick* format
84 193
85 Note: the names of sequences in the alignment must match the names of the sequences in the tree. 194 Note: the names of sequences in the alignment must match the names of the sequences in the tree.
86 195
87 196
88 Output 197 **Output**
89 ------
90 198
91 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). 199 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).
92 200
93 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/MEME for an example) 201 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/MEME for an example)
94 202
95 Further reading 203 **Further reading**
96 ---------------
97 204
98 http://hyphy.org/methods/selection-methods/#MEME 205 http://hyphy.org/methods/selection-methods/#MEME
99 206
100 207
101 Tool options 208 **Tool options**
102 ------------
103 :: 209 ::
104 210
105 211 --alignment [required] An in-frame codon alignment in one of the formats supported by HyPhy.
106 --code Which genetic code to use 212 --tree [conditionally required] A phylogenetic tree (optionally annotated with {}).
213
214 --code Which genetic code to use (see tool form for available options).
107 215
108 --branches Which branches should be tested for selection? 216 --branches Which branches should be tested for selection?
109 All [default] : test all branches 217 All [default] : test all branches.
110 218 Internal : test only internal branches (suitable for intra-host pathogen evolution for example, where terminal branches may contain polymorphism data).
111 Internal : test only internal branches (suitable for 219 Leaves: test only terminal (leaf) branches.
112 intra-host pathogen evolution for example, where terminal branches 220 Unlabeled: if the Newick string is labeled using the {} notation, test only branches without explicit labels (see http://hyphy.org/tutorials/phylotree/).
113 may contain polymorphism data) 221 Custom : Enter a branch label.
114 222
115 Leaves: test only terminal (leaf) branches 223 --pvalue The significance level used to determine significance (default: 0.1, range: 0 to 1).
116 224
117 Unlabeled: if the Newick string is labeled using the {} notation, 225 --resample Perform parametric bootstrap resampling to derive site-level null LRT distributions.
118 test only branches without explicit labels 226 Warning: This will result in a significantly slower analysis. A value of 0 means no resampling is performed. This parameter specifies the maximum number of replicates per site (default: 0, range: 0 to 1000).
119 (see http://hyphy.org/tutorials/phylotree/) 227
120 228 --rates The number omega rate classes to include in the model (default: 2, range: 2 to 4).
121 --pvalue The significance level used to determine significance 229
122 230 --multiple-hits Include support for multiple nucleotide substitutions.
231 Double : Include branch-specific rates for double nucleotide substitutions.
232 Double+Triple : Include branch-specific rates for double and triple nucleotide substitutions.
233 None [default] : Use standard models which permit only single nucleotide changes to occur instantly.
234
235 --site-multihit Estimate multiple hit rates for each site. This option is available only if 'Include support for multiple nucleotide substitutions' is set to 'Double' or 'Double+Triple'.
236 Estimate [default] : Estimate multiple hit rates.
237 No : Do not estimate multiple hit rates.
238
239 --impute-states Use site-level model fits to impute likely character states for each sequence (default: No).
240
241 --precision Optimization precision settings for preliminary fits.
242 Standard [default]
243 Reduced for faster fitting
244
245 --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency.
246 Yes [default] : Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise).
247 Constrain : Keep zero-length branches, but constrain their values to 0.
248 No : Keep all branches.
249
250 --restrict-sites Restrict MEME analysis to a subset of sites. If Yes, allows specifying a subset of sites for analysis.
251 Yes : Restrict analysis to a subset of sites.
252 No [default] : Do not restrict analysis to a subset of sites.
253
254 --limit-to-sites Only analyze sites whose 1-based indices match the following list (null to skip). This option is available only if 'Restrict MEME analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices.
255 --save-lf-for-sites For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip). This option is available only if 'Restrict MEME analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices.
256
257 --full-model Perform branch length re-optimization under the full codon model (default: Yes).
123 258
124 ]]></help> 259 ]]></help>
125 <expand macro="citations"> 260 <expand macro="citations">
126 <citation type="doi">10.1371/journal.pgen.1002764</citation> 261 <citation type="doi">10.1371/journal.pgen.1002764</citation>
127 </expand> 262 </expand>