Mercurial > repos > iuc > hyphy_meme
comparison hyphy_meme.xml @ 35:ff2724696a7d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
| author | iuc |
|---|---|
| date | Tue, 07 Oct 2025 20:39:19 +0000 |
| parents | 07d7b41c670b |
| children |
comparison
equal
deleted
inserted
replaced
| 34:07d7b41c670b | 35:ff2724696a7d |
|---|---|
| 12 @INPUT_TREE@ | 12 @INPUT_TREE@ |
| 13 --code '$gencodeid' | 13 --code '$gencodeid' |
| 14 @branch_options@ | 14 @branch_options@ |
| 15 --pvalue '$p_value' | 15 --pvalue '$p_value' |
| 16 --resample $resample | 16 --resample $resample |
| 17 --rates $rates | |
| 18 --multiple-hits $multiple_hits_conditional.multiple_hits | |
| 19 #if $multiple_hits_conditional.multiple_hits != "None": | |
| 20 --site-multihit $multiple_hits_conditional.site_multihit | |
| 21 #end if | |
| 22 --impute-states '$impute_states' | |
| 23 --precision $precision | |
| 24 --kill-zero-lengths $kill_zero_lengths | |
| 25 #if $restrict_sites_conditional.restrict_sites_flag == "true": | |
| 26 --limit-to-sites '$restrict_sites_conditional.limit_to_sites' | |
| 27 --save-lf-for-sites '$restrict_sites_conditional.save_lf_for_sites' | |
| 28 #end if | |
| 17 --output '$meme_output' | 29 --output '$meme_output' |
| 18 --full-model '$full_model' | 30 --full-model '$full_model' |
| 31 > meme_stdout.md | |
| 19 @ERRORS@ | 32 @ERRORS@ |
| 20 ]]></command> | 33 ]]></command> |
| 21 <inputs> | 34 <inputs> |
| 22 <expand macro="inputs"/> | 35 <expand macro="inputs"/> |
| 23 <expand macro="gencode"/> | 36 <expand macro="gencode"/> |
| 24 <expand macro="branches"/> | 37 <expand macro="branches"/> |
| 25 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/> | 38 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/> |
| 26 <expand macro="resample"/> | 39 <section name="advanced_options" title="Advanced Options" expanded="false"> |
| 27 <param argument="--full-model" type="boolean" truevalue="Yes" falsevalue="No" label="Perform branch length re-optimization under the full codon model" /> | 40 <expand macro="resample"/> |
| 41 <param argument="--rates" type="integer" value="2" min="2" max="4" label="Number of omega rate classes"/> | |
| 42 <conditional name="multiple_hits_conditional"> | |
| 43 <param argument="--multiple-hits" type="select" label="Include support for multiple nucleotide substitutions"> | |
| 44 <option value="Double">Include branch-specific rates for double nucleotide substitutions</option> | |
| 45 <option value="Double+Triple">Include branch-specific rates for double and triple nucleotide substitutions</option> | |
| 46 <option value="None" selected="true">Use standard models which permit only single nucleotide changes to occur instantly</option> | |
| 47 </param> | |
| 48 <when value="Double"> | |
| 49 <param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site"> | |
| 50 <option value="Estimate" selected="true">Estimate</option> | |
| 51 <option value="No">No</option> | |
| 52 </param> | |
| 53 </when> | |
| 54 <when value="Double+Triple"> | |
| 55 <param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site"> | |
| 56 <option value="Estimate" selected="true">Estimate</option> | |
| 57 <option value="No">No</option> | |
| 58 </param> | |
| 59 </when> | |
| 60 <when value="None"> | |
| 61 </when> | |
| 62 </conditional> | |
| 63 <param argument="--impute-states" type="boolean" truevalue="Yes" falsevalue="No" label="Impute likely character states for each sequence"/> | |
| 64 <param argument="--precision" type="select" label="Optimization precision for preliminary fits"> | |
| 65 <option value="standard">Standard</option> | |
| 66 <option value="reduced">Reduced for faster fitting</option> | |
| 67 </param> | |
| 68 <expand macro="kill_zero_lengths_param"/> | |
| 69 <conditional name="restrict_sites_conditional"> | |
| 70 <param name="restrict_sites_flag" type="select" label="Restrict MEME analysis to a subset of sites" help="If Yes, allows specifying a subset of sites for analysis."> | |
| 71 <option value="true">Yes</option> | |
| 72 <option value="false" selected="true">No</option> | |
| 73 </param> | |
| 74 <when value="true"> | |
| 75 <param argument="--limit-to-sites" type="text" optional="true" label="Only analyze sites whose 1-based indices match the following list (null to skip)" help="Comma-separated list of site indices."/> | |
| 76 <param argument="--save-lf-for-sites" type="text" optional="true" label="For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip)" help="Comma-separated list of site indices."/> | |
| 77 </when> | |
| 78 <when value="false"> | |
| 79 </when> | |
| 80 </conditional> | |
| 81 <param argument="--full-model" type="boolean" truevalue="Yes" falsevalue="No" checked="true" label="Perform branch length re-optimization under the full codon model" /> | |
| 82 </section> | |
| 28 </inputs> | 83 </inputs> |
| 29 <outputs> | 84 <outputs> |
| 30 <data name="meme_output" format="hyphy_results.json" /> | 85 <data name="meme_output" format="hyphy_results.json" /> |
| 86 <data name="meme_md_report" format="markdown" from_work_dir="meme_stdout.md" label="MEME Report (Markdown) for ${tool.name} on ${on_string}" /> | |
| 31 </outputs> | 87 </outputs> |
| 32 <tests> | 88 <tests> |
| 33 <test> | 89 <test expect_num_outputs="2"> |
| 34 <param name="input_file" ftype="fasta" value="meme-in1.fa"/> | 90 <param name="input_file" ftype="fasta" value="meme-in1.fa"/> |
| 35 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/> | 91 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/> |
| 36 <conditional name="branch_cond"> | 92 <conditional name="branch_cond"> |
| 37 <param name="branch_sel" value="All"/> | 93 <param name="branch_sel" value="All"/> |
| 38 </conditional> | 94 </conditional> |
| 39 <param name="p_value" value="0.1"/> | 95 <param name="p_value" value="0.1"/> |
| 40 <output name="meme_output" file="meme-out1.json" compare="sim_size" delta="25000"/> | 96 <output name="meme_output"> |
| 97 <assert_contents> | |
| 98 <has_text text="fits"/> | |
| 99 <has_text text="branch attributes"/> | |
| 100 <has_text text="Global MG94xREV"/> | |
| 101 </assert_contents> | |
| 102 </output> | |
| 103 <output name="meme_md_report"> | |
| 104 <assert_contents> | |
| 105 <has_text text="### For partition 1 these sites are significant at p <=0.1"/> | |
| 106 </assert_contents> | |
| 107 </output> | |
| 108 </test> | |
| 109 <test expect_num_outputs="2"> | |
| 110 <param name="input_file" ftype="fasta" value="meme-in1.fa"/> | |
| 111 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/> | |
| 112 <conditional name="branch_cond"> | |
| 113 <param name="branch_sel" value="Internal"/> | |
| 114 </conditional> | |
| 115 <section name="advanced_options"> | |
| 116 <param name="rates" value="3"/> | |
| 117 </section> | |
| 118 <output name="meme_output"> | |
| 119 <assert_contents> | |
| 120 <has_text text="fits"/> | |
| 121 <has_text text="branch attributes"/> | |
| 122 <has_text text="Mixture distribution weight allocated to negative/neutral evolution component 2"/> | |
| 123 </assert_contents> | |
| 124 </output> | |
| 125 <output name="meme_md_report"> | |
| 126 <assert_contents> | |
| 127 <has_text text=">rates => 3"/> | |
| 128 <has_text text="non-syn rate (beta) distribution, rates : weights"/> | |
| 129 </assert_contents> | |
| 130 </output> | |
| 131 </test> | |
| 132 <test expect_num_outputs="2"> | |
| 133 <param name="input_file" ftype="fasta" value="meme-in1.fa"/> | |
| 134 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/> | |
| 135 <section name="advanced_options"> | |
| 136 <param name="impute_states" value="Yes"/> | |
| 137 <conditional name="multiple_hits_conditional"> | |
| 138 <param name="multiple_hits" value="Double"/> | |
| 139 </conditional> | |
| 140 </section> | |
| 141 | |
| 142 <output name="meme_output"> | |
| 143 <assert_contents> | |
| 144 <has_text text="fits"/> | |
| 145 <has_text text="Imputed States"/> | |
| 146 <has_text text="Relative rate estimate for 2-nucleotide substitutions"/> | |
| 147 </assert_contents> | |
| 148 </output> | |
| 149 <output name="meme_md_report"> | |
| 150 <assert_contents> | |
| 151 <has_text text="rate at which 2 nucleotides are changed instantly within a single codon"/> | |
| 152 </assert_contents> | |
| 153 </output> | |
| 41 </test> | 154 </test> |
| 42 </tests> | 155 </tests> |
| 43 <help><![CDATA[ | 156 <help><![CDATA[ |
| 44 MEME: Mixed Effects Model of Evolution | 157 MEME: Mixed Effects Model of Evolution |
| 45 ====================================== | 158 ====================================== |
| 46 | 159 |
| 47 What question does this method answer? | 160 **What question does this method answer?** |
| 48 -------------------------------------- | |
| 49 | 161 |
| 50 Which site(s) in a gene are subject to pervasive or *episodic*, i.e. only on a | 162 Which site(s) in a gene are subject to pervasive or *episodic*, i.e. only on a |
| 51 single lineage or subset of lineages, diversifying selection? | 163 single lineage or subset of lineages, diversifying selection? |
| 52 | 164 |
| 53 Recommended Applications | 165 **Recommended Applications** |
| 54 ------------------------ | |
| 55 | 166 |
| 56 The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics | 167 The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics |
| 57 (or balancing selection), including adaptive immune escape by viruses. | 168 (or balancing selection), including adaptive immune escape by viruses. |
| 58 MEME is ideally suited to identify sites under positive selection which | 169 MEME is ideally suited to identify sites under positive selection which |
| 59 represent candidate sites subject to strong selective pressures across the entire phylogeny or only on parts of the phylogeny. | 170 represent candidate sites subject to strong selective pressures across the entire phylogeny or only on parts of the phylogeny. |
| 60 | 171 |
| 61 MEME is the sole method in HyPhy for detecting selection at individual sites that considers both pervasive and episodic selection. | 172 MEME is the sole method in HyPhy for detecting selection at individual sites that considers both pervasive and episodic selection. |
| 62 MEME is therefore our recommended method if maximum power is desired. | 173 MEME is therefore our recommended method if maximum power is desired. |
| 63 | 174 |
| 64 | 175 **Methodology** |
| 65 Brief description | 176 |
| 66 ----------------- | 177 MEME (Mixed Effects Model of Evolution) is a powerful statistical method for detecting sites in a coding alignment that have been subject to positive selection. It extends classical fixed-effects likelihood (FEL) models by allowing the non-synonymous substitution rate (dN) to vary from branch to branch at a given site. This "mixed-effects" approach provides increased power to detect episodic selection, where a site may be under positive selection in some lineages but under neutral or purifying selection in others. |
| 67 | 178 |
| 68 MEME (Mixed Effects Model of Evolution) estimates a site-wise synonymous | 179 **The Intuition** |
| 69 (alpha) and a two-category mixture of non-synonymous (beta-, with | 180 |
| 70 proportion p-, and beta+ with proportion [1-p-]) rates, and uses a | 181 Imagine you are studying the evolution of a gene across a group of species. Some sites in that gene might be under constant pressure to change (pervasive selection), while others might only experience this pressure for a short period of time in a specific lineage (episodic selection). For example, a virus might evolve a new protein to escape the host's immune system, but once the host population adapts, the pressure on that protein might disappear. |
| 71 likelihood ratio test to determine if beta+ > α at a site. The | 182 |
| 72 estimates aggregate information over a proportion of branches at a site, | 183 Standard methods that assume a single dN/dS rate across the entire phylogeny might miss this kind of episodic selection. MEME addresses this by modeling the dN/dS ratio at each site as a mixture of two or more rate classes. For each site, MEME infers the probability that it evolves under each rate class on a given branch. This allows the model to identify sites that show evidence of positive selection (dN/dS > 1) even if that selection is confined to a small number of lineages. |
| 73 so the signal is derived from episodic diversification, which is a | 184 |
| 74 combination of strength of selection [effect size] and the proportion of | 185 **The Test** |
| 75 the tree affected. A subset of branches can be selected for testing as | 186 |
| 76 well, in which case an additional (nuisance) parameter will be inferred | 187 For each site, MEME fits a baseline model where dN/dS is constrained to be less than or equal to 1, and an alternative model where a proportion of branches are allowed to have a dN/dS ratio greater than 1. A likelihood ratio test (LRT) is then used to determine if the alternative model provides a significantly better fit to the data. A significant p-value indicates that the site has experienced episodic diversifying selection. |
| 77 -- the non-synonymous rate on branches NOT selected for testing. | 188 |
| 78 | 189 **Input** |
| 79 Input | |
| 80 ----- | |
| 81 | 190 |
| 82 1. A *FASTA* sequence alignment. | 191 1. A *FASTA* sequence alignment. |
| 83 2. A phylogenetic tree in the *Newick* format | 192 2. A phylogenetic tree in the *Newick* format |
| 84 | 193 |
| 85 Note: the names of sequences in the alignment must match the names of the sequences in the tree. | 194 Note: the names of sequences in the alignment must match the names of the sequences in the tree. |
| 86 | 195 |
| 87 | 196 |
| 88 Output | 197 **Output** |
| 89 ------ | |
| 90 | 198 |
| 91 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). | 199 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). |
| 92 | 200 |
| 93 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/MEME for an example) | 201 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/MEME for an example) |
| 94 | 202 |
| 95 Further reading | 203 **Further reading** |
| 96 --------------- | |
| 97 | 204 |
| 98 http://hyphy.org/methods/selection-methods/#MEME | 205 http://hyphy.org/methods/selection-methods/#MEME |
| 99 | 206 |
| 100 | 207 |
| 101 Tool options | 208 **Tool options** |
| 102 ------------ | |
| 103 :: | 209 :: |
| 104 | 210 |
| 105 | 211 --alignment [required] An in-frame codon alignment in one of the formats supported by HyPhy. |
| 106 --code Which genetic code to use | 212 --tree [conditionally required] A phylogenetic tree (optionally annotated with {}). |
| 213 | |
| 214 --code Which genetic code to use (see tool form for available options). | |
| 107 | 215 |
| 108 --branches Which branches should be tested for selection? | 216 --branches Which branches should be tested for selection? |
| 109 All [default] : test all branches | 217 All [default] : test all branches. |
| 110 | 218 Internal : test only internal branches (suitable for intra-host pathogen evolution for example, where terminal branches may contain polymorphism data). |
| 111 Internal : test only internal branches (suitable for | 219 Leaves: test only terminal (leaf) branches. |
| 112 intra-host pathogen evolution for example, where terminal branches | 220 Unlabeled: if the Newick string is labeled using the {} notation, test only branches without explicit labels (see http://hyphy.org/tutorials/phylotree/). |
| 113 may contain polymorphism data) | 221 Custom : Enter a branch label. |
| 114 | 222 |
| 115 Leaves: test only terminal (leaf) branches | 223 --pvalue The significance level used to determine significance (default: 0.1, range: 0 to 1). |
| 116 | 224 |
| 117 Unlabeled: if the Newick string is labeled using the {} notation, | 225 --resample Perform parametric bootstrap resampling to derive site-level null LRT distributions. |
| 118 test only branches without explicit labels | 226 Warning: This will result in a significantly slower analysis. A value of 0 means no resampling is performed. This parameter specifies the maximum number of replicates per site (default: 0, range: 0 to 1000). |
| 119 (see http://hyphy.org/tutorials/phylotree/) | 227 |
| 120 | 228 --rates The number omega rate classes to include in the model (default: 2, range: 2 to 4). |
| 121 --pvalue The significance level used to determine significance | 229 |
| 122 | 230 --multiple-hits Include support for multiple nucleotide substitutions. |
| 231 Double : Include branch-specific rates for double nucleotide substitutions. | |
| 232 Double+Triple : Include branch-specific rates for double and triple nucleotide substitutions. | |
| 233 None [default] : Use standard models which permit only single nucleotide changes to occur instantly. | |
| 234 | |
| 235 --site-multihit Estimate multiple hit rates for each site. This option is available only if 'Include support for multiple nucleotide substitutions' is set to 'Double' or 'Double+Triple'. | |
| 236 Estimate [default] : Estimate multiple hit rates. | |
| 237 No : Do not estimate multiple hit rates. | |
| 238 | |
| 239 --impute-states Use site-level model fits to impute likely character states for each sequence (default: No). | |
| 240 | |
| 241 --precision Optimization precision settings for preliminary fits. | |
| 242 Standard [default] | |
| 243 Reduced for faster fitting | |
| 244 | |
| 245 --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency. | |
| 246 Yes [default] : Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise). | |
| 247 Constrain : Keep zero-length branches, but constrain their values to 0. | |
| 248 No : Keep all branches. | |
| 249 | |
| 250 --restrict-sites Restrict MEME analysis to a subset of sites. If Yes, allows specifying a subset of sites for analysis. | |
| 251 Yes : Restrict analysis to a subset of sites. | |
| 252 No [default] : Do not restrict analysis to a subset of sites. | |
| 253 | |
| 254 --limit-to-sites Only analyze sites whose 1-based indices match the following list (null to skip). This option is available only if 'Restrict MEME analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices. | |
| 255 --save-lf-for-sites For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip). This option is available only if 'Restrict MEME analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices. | |
| 256 | |
| 257 --full-model Perform branch length re-optimization under the full codon model (default: Yes). | |
| 123 | 258 |
| 124 ]]></help> | 259 ]]></help> |
| 125 <expand macro="citations"> | 260 <expand macro="citations"> |
| 126 <citation type="doi">10.1371/journal.pgen.1002764</citation> | 261 <citation type="doi">10.1371/journal.pgen.1002764</citation> |
| 127 </expand> | 262 </expand> |
