Mercurial > repos > iuc > hyphy_meme
view hyphy_meme.xml @ 35:ff2724696a7d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
| author | iuc |
|---|---|
| date | Tue, 07 Oct 2025 20:39:19 +0000 |
| parents | 07d7b41c670b |
| children |
line wrap: on
line source
<tool id="hyphy_meme" name="HyPhy-MEME" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Mixed Effects Model of Evolution</description> <macros> <import>macros.xml</import> </macros> <expand macro="bio_tools"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ @SYMLINK_FILES@ @HYPHYMPI@ meme --alignment ./$input_file @INPUT_TREE@ --code '$gencodeid' @branch_options@ --pvalue '$p_value' --resample $resample --rates $rates --multiple-hits $multiple_hits_conditional.multiple_hits #if $multiple_hits_conditional.multiple_hits != "None": --site-multihit $multiple_hits_conditional.site_multihit #end if --impute-states '$impute_states' --precision $precision --kill-zero-lengths $kill_zero_lengths #if $restrict_sites_conditional.restrict_sites_flag == "true": --limit-to-sites '$restrict_sites_conditional.limit_to_sites' --save-lf-for-sites '$restrict_sites_conditional.save_lf_for_sites' #end if --output '$meme_output' --full-model '$full_model' > meme_stdout.md @ERRORS@ ]]></command> <inputs> <expand macro="inputs"/> <expand macro="gencode"/> <expand macro="branches"/> <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/> <section name="advanced_options" title="Advanced Options" expanded="false"> <expand macro="resample"/> <param argument="--rates" type="integer" value="2" min="2" max="4" label="Number of omega rate classes"/> <conditional name="multiple_hits_conditional"> <param argument="--multiple-hits" type="select" label="Include support for multiple nucleotide substitutions"> <option value="Double">Include branch-specific rates for double nucleotide substitutions</option> <option value="Double+Triple">Include branch-specific rates for double and triple nucleotide substitutions</option> <option value="None" selected="true">Use standard models which permit only single nucleotide changes to occur instantly</option> </param> <when value="Double"> <param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site"> <option value="Estimate" selected="true">Estimate</option> <option value="No">No</option> </param> </when> <when value="Double+Triple"> <param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site"> <option value="Estimate" selected="true">Estimate</option> <option value="No">No</option> </param> </when> <when value="None"> </when> </conditional> <param argument="--impute-states" type="boolean" truevalue="Yes" falsevalue="No" label="Impute likely character states for each sequence"/> <param argument="--precision" type="select" label="Optimization precision for preliminary fits"> <option value="standard">Standard</option> <option value="reduced">Reduced for faster fitting</option> </param> <expand macro="kill_zero_lengths_param"/> <conditional name="restrict_sites_conditional"> <param name="restrict_sites_flag" type="select" label="Restrict MEME analysis to a subset of sites" help="If Yes, allows specifying a subset of sites for analysis."> <option value="true">Yes</option> <option value="false" selected="true">No</option> </param> <when value="true"> <param argument="--limit-to-sites" type="text" optional="true" label="Only analyze sites whose 1-based indices match the following list (null to skip)" help="Comma-separated list of site indices."/> <param argument="--save-lf-for-sites" type="text" optional="true" label="For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip)" help="Comma-separated list of site indices."/> </when> <when value="false"> </when> </conditional> <param argument="--full-model" type="boolean" truevalue="Yes" falsevalue="No" checked="true" label="Perform branch length re-optimization under the full codon model" /> </section> </inputs> <outputs> <data name="meme_output" format="hyphy_results.json" /> <data name="meme_md_report" format="markdown" from_work_dir="meme_stdout.md" label="MEME Report (Markdown) for ${tool.name} on ${on_string}" /> </outputs> <tests> <test expect_num_outputs="2"> <param name="input_file" ftype="fasta" value="meme-in1.fa"/> <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/> <conditional name="branch_cond"> <param name="branch_sel" value="All"/> </conditional> <param name="p_value" value="0.1"/> <output name="meme_output"> <assert_contents> <has_text text="fits"/> <has_text text="branch attributes"/> <has_text text="Global MG94xREV"/> </assert_contents> </output> <output name="meme_md_report"> <assert_contents> <has_text text="### For partition 1 these sites are significant at p <=0.1"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <param name="input_file" ftype="fasta" value="meme-in1.fa"/> <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/> <conditional name="branch_cond"> <param name="branch_sel" value="Internal"/> </conditional> <section name="advanced_options"> <param name="rates" value="3"/> </section> <output name="meme_output"> <assert_contents> <has_text text="fits"/> <has_text text="branch attributes"/> <has_text text="Mixture distribution weight allocated to negative/neutral evolution component 2"/> </assert_contents> </output> <output name="meme_md_report"> <assert_contents> <has_text text=">rates => 3"/> <has_text text="non-syn rate (beta) distribution, rates : weights"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <param name="input_file" ftype="fasta" value="meme-in1.fa"/> <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/> <section name="advanced_options"> <param name="impute_states" value="Yes"/> <conditional name="multiple_hits_conditional"> <param name="multiple_hits" value="Double"/> </conditional> </section> <output name="meme_output"> <assert_contents> <has_text text="fits"/> <has_text text="Imputed States"/> <has_text text="Relative rate estimate for 2-nucleotide substitutions"/> </assert_contents> </output> <output name="meme_md_report"> <assert_contents> <has_text text="rate at which 2 nucleotides are changed instantly within a single codon"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ MEME: Mixed Effects Model of Evolution ====================================== **What question does this method answer?** Which site(s) in a gene are subject to pervasive or *episodic*, i.e. only on a single lineage or subset of lineages, diversifying selection? **Recommended Applications** The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics (or balancing selection), including adaptive immune escape by viruses. MEME is ideally suited to identify sites under positive selection which represent candidate sites subject to strong selective pressures across the entire phylogeny or only on parts of the phylogeny. MEME is the sole method in HyPhy for detecting selection at individual sites that considers both pervasive and episodic selection. MEME is therefore our recommended method if maximum power is desired. **Methodology** MEME (Mixed Effects Model of Evolution) is a powerful statistical method for detecting sites in a coding alignment that have been subject to positive selection. It extends classical fixed-effects likelihood (FEL) models by allowing the non-synonymous substitution rate (dN) to vary from branch to branch at a given site. This "mixed-effects" approach provides increased power to detect episodic selection, where a site may be under positive selection in some lineages but under neutral or purifying selection in others. **The Intuition** Imagine you are studying the evolution of a gene across a group of species. Some sites in that gene might be under constant pressure to change (pervasive selection), while others might only experience this pressure for a short period of time in a specific lineage (episodic selection). For example, a virus might evolve a new protein to escape the host's immune system, but once the host population adapts, the pressure on that protein might disappear. Standard methods that assume a single dN/dS rate across the entire phylogeny might miss this kind of episodic selection. MEME addresses this by modeling the dN/dS ratio at each site as a mixture of two or more rate classes. For each site, MEME infers the probability that it evolves under each rate class on a given branch. This allows the model to identify sites that show evidence of positive selection (dN/dS > 1) even if that selection is confined to a small number of lineages. **The Test** For each site, MEME fits a baseline model where dN/dS is constrained to be less than or equal to 1, and an alternative model where a proportion of branches are allowed to have a dN/dS ratio greater than 1. A likelihood ratio test (LRT) is then used to determine if the alternative model provides a significantly better fit to the data. A significant p-value indicates that the site has experienced episodic diversifying selection. **Input** 1. A *FASTA* sequence alignment. 2. A phylogenetic tree in the *Newick* format Note: the names of sequences in the alignment must match the names of the sequences in the tree. **Output** A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). A custom visualization module for viewing these results is available (see http://vision.hyphy.org/MEME for an example) **Further reading** http://hyphy.org/methods/selection-methods/#MEME **Tool options** :: --alignment [required] An in-frame codon alignment in one of the formats supported by HyPhy. --tree [conditionally required] A phylogenetic tree (optionally annotated with {}). --code Which genetic code to use (see tool form for available options). --branches Which branches should be tested for selection? All [default] : test all branches. Internal : test only internal branches (suitable for intra-host pathogen evolution for example, where terminal branches may contain polymorphism data). Leaves: test only terminal (leaf) branches. Unlabeled: if the Newick string is labeled using the {} notation, test only branches without explicit labels (see http://hyphy.org/tutorials/phylotree/). Custom : Enter a branch label. --pvalue The significance level used to determine significance (default: 0.1, range: 0 to 1). --resample Perform parametric bootstrap resampling to derive site-level null LRT distributions. Warning: This will result in a significantly slower analysis. A value of 0 means no resampling is performed. This parameter specifies the maximum number of replicates per site (default: 0, range: 0 to 1000). --rates The number omega rate classes to include in the model (default: 2, range: 2 to 4). --multiple-hits Include support for multiple nucleotide substitutions. Double : Include branch-specific rates for double nucleotide substitutions. Double+Triple : Include branch-specific rates for double and triple nucleotide substitutions. None [default] : Use standard models which permit only single nucleotide changes to occur instantly. --site-multihit Estimate multiple hit rates for each site. This option is available only if 'Include support for multiple nucleotide substitutions' is set to 'Double' or 'Double+Triple'. Estimate [default] : Estimate multiple hit rates. No : Do not estimate multiple hit rates. --impute-states Use site-level model fits to impute likely character states for each sequence (default: No). --precision Optimization precision settings for preliminary fits. Standard [default] Reduced for faster fitting --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency. Yes [default] : Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise). Constrain : Keep zero-length branches, but constrain their values to 0. No : Keep all branches. --restrict-sites Restrict MEME analysis to a subset of sites. If Yes, allows specifying a subset of sites for analysis. Yes : Restrict analysis to a subset of sites. No [default] : Do not restrict analysis to a subset of sites. --limit-to-sites Only analyze sites whose 1-based indices match the following list (null to skip). This option is available only if 'Restrict MEME analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices. --save-lf-for-sites For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip). This option is available only if 'Restrict MEME analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices. --full-model Perform branch length re-optimization under the full codon model (default: Yes). ]]></help> <expand macro="citations"> <citation type="doi">10.1371/journal.pgen.1002764</citation> </expand> </tool>
