hyphy_meme: hyphy_meme.xml comparison

comparison hyphy_meme.xml @ 35:ff2724696a7d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92

author	iuc
date	Tue, 07 Oct 2025 20:39:19 +0000
parents	07d7b41c670b
children

comparison

equal deleted inserted replaced

-:07d7b41c670b
+:ff2724696a7d
 @INPUT_TREE@
 --code '$gencodeid'
 @branch_options@
 --pvalue '$p_value'
 --resample $resample
+--rates $rates
+--multiple-hits $multiple_hits_conditional.multiple_hits
+#if $multiple_hits_conditional.multiple_hits != "None":
+--site-multihit $multiple_hits_conditional.site_multihit
+#end if
+--impute-states '$impute_states'
+--precision $precision
+--kill-zero-lengths $kill_zero_lengths
+#if $restrict_sites_conditional.restrict_sites_flag == "true":
+--limit-to-sites '$restrict_sites_conditional.limit_to_sites'
+--save-lf-for-sites '$restrict_sites_conditional.save_lf_for_sites'
+#end if
 --output '$meme_output'
 --full-model '$full_model'
+> meme_stdout.md
 @ERRORS@
 ]]></command>
 <inputs>
 <expand macro="inputs"/>
 <expand macro="gencode"/>
 <expand macro="branches"/>
 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/>
-<expand macro="resample"/>
+<section name="advanced_options" title="Advanced Options" expanded="false">
-<param argument="--full-model" type="boolean" truevalue="Yes" falsevalue="No" label="Perform branch length re-optimization under the full codon model" />
+<expand macro="resample"/>
+<param argument="--rates" type="integer" value="2" min="2" max="4" label="Number of omega rate classes"/>
+<conditional name="multiple_hits_conditional">
+<param argument="--multiple-hits" type="select" label="Include support for multiple nucleotide substitutions">
+<option value="Double">Include branch-specific rates for double nucleotide substitutions</option>
+<option value="Double+Triple">Include branch-specific rates for double and triple nucleotide substitutions</option>
+<option value="None" selected="true">Use standard models which permit only single nucleotide changes to occur instantly</option>
+</param>
+<when value="Double">
+<param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site">
+<option value="Estimate" selected="true">Estimate</option>
+<option value="No">No</option>
+</param>
+</when>
+<when value="Double+Triple">
+<param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site">
+<option value="Estimate" selected="true">Estimate</option>
+<option value="No">No</option>
+</param>
+</when>
+<when value="None">
+</when>
+</conditional>
+<param argument="--impute-states" type="boolean" truevalue="Yes" falsevalue="No" label="Impute likely character states for each sequence"/>
+<param argument="--precision" type="select" label="Optimization precision for preliminary fits">
+<option value="standard">Standard</option>
+<option value="reduced">Reduced for faster fitting</option>
+</param>
+<expand macro="kill_zero_lengths_param"/>
+<conditional name="restrict_sites_conditional">
+<param name="restrict_sites_flag" type="select" label="Restrict MEME analysis to a subset of sites" help="If Yes, allows specifying a subset of sites for analysis.">
+<option value="true">Yes</option>
+<option value="false" selected="true">No</option>
+</param>
+<when value="true">
+<param argument="--limit-to-sites" type="text" optional="true" label="Only analyze sites whose 1-based indices match the following list (null to skip)" help="Comma-separated list of site indices."/>
+<param argument="--save-lf-for-sites" type="text" optional="true" label="For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip)" help="Comma-separated list of site indices."/>
+</when>
+<when value="false">
+</when>
+</conditional>
+<param argument="--full-model" type="boolean" truevalue="Yes" falsevalue="No" checked="true" label="Perform branch length re-optimization under the full codon model" />
+</section>
 </inputs>
 <outputs>
 <data name="meme_output" format="hyphy_results.json" />
+<data name="meme_md_report" format="markdown" from_work_dir="meme_stdout.md" label="MEME Report (Markdown) for ${tool.name} on ${on_string}" />
 </outputs>
 <tests>
-<test>
+<test expect_num_outputs="2">
 <param name="input_file" ftype="fasta" value="meme-in1.fa"/>
 <param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/>
 <conditional name="branch_cond">
 <param name="branch_sel" value="All"/>
 </conditional>
 <param name="p_value" value="0.1"/>
-<output name="meme_output" file="meme-out1.json" compare="sim_size" delta="25000"/>
+<output name="meme_output">
+<assert_contents>
+<has_text text="fits"/>
+<has_text text="branch attributes"/>
+<has_text text="Global MG94xREV"/>
+</assert_contents>
+</output>
+<output name="meme_md_report">
+<assert_contents>
+<has_text text="### For partition 1 these sites are significant at p &lt;=0.1"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="2">
+<param name="input_file" ftype="fasta" value="meme-in1.fa"/>
+<param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/>
+<conditional name="branch_cond">
+<param name="branch_sel" value="Internal"/>
+</conditional>
+<section name="advanced_options">
+<param name="rates" value="3"/>
+</section>
+<output name="meme_output">
+<assert_contents>
+<has_text text="fits"/>
+<has_text text="branch attributes"/>
+<has_text text="Mixture distribution weight allocated to negative/neutral evolution component 2"/>
+</assert_contents>
+</output>
+<output name="meme_md_report">
+<assert_contents>
+<has_text text=">rates => 3"/>
+<has_text text="non-syn rate (beta) distribution, rates : weights"/>
+</assert_contents>
+</output>
+</test>
+<test expect_num_outputs="2">
+<param name="input_file" ftype="fasta" value="meme-in1.fa"/>
+<param name="input_nhx" ftype="nhx" value="meme-in1.nhx"/>
+<section name="advanced_options">
+<param name="impute_states" value="Yes"/>
+<conditional name="multiple_hits_conditional">
+<param name="multiple_hits" value="Double"/>
+</conditional>
+</section>
+<output name="meme_output">
+<assert_contents>
+<has_text text="fits"/>
+<has_text text="Imputed States"/>
+<has_text text="Relative rate estimate for 2-nucleotide substitutions"/>
+</assert_contents>
+</output>
+<output name="meme_md_report">
+<assert_contents>
+<has_text text="rate at which 2 nucleotides are changed instantly within a single codon"/>
+</assert_contents>
+</output>
 </test>
 </tests>
 <help><![CDATA[
 MEME: Mixed Effects Model of Evolution
 ======================================
-What question does this method answer?
+**What question does this method answer?**
---------------------------------------
 Which site(s) in a gene are subject to pervasive or *episodic*, i.e. only on a
 single lineage or subset of lineages, diversifying selection?
-Recommended Applications
+**Recommended Applications**
-------------------------
 The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics
 (or balancing selection), including adaptive immune escape by viruses.
 MEME is ideally suited to identify sites under positive selection which
 represent candidate sites subject to strong selective pressures across the entire phylogeny or only on parts of the phylogeny.
 MEME is the sole method in HyPhy for detecting selection at individual sites that considers both pervasive and episodic selection.
 MEME is therefore our recommended method if maximum power is desired.
+**Methodology**
-Brief description
------------------
+MEME (Mixed Effects Model of Evolution) is a powerful statistical method for detecting sites in a coding alignment that have been subject to positive selection. It extends classical fixed-effects likelihood (FEL) models by allowing the non-synonymous substitution rate (dN) to vary from branch to branch at a given site. This "mixed-effects" approach provides increased power to detect episodic selection, where a site may be under positive selection in some lineages but under neutral or purifying selection in others.
-MEME (Mixed Effects Model of Evolution) estimates a site-wise synonymous
+**The Intuition**
-(alpha) and a two-category mixture of non-synonymous (beta-, with
-proportion p-, and beta+ with proportion [1-p-]) rates, and uses a
+Imagine you are studying the evolution of a gene across a group of species. Some sites in that gene might be under constant pressure to change (pervasive selection), while others might only experience this pressure for a short period of time in a specific lineage (episodic selection). For example, a virus might evolve a new protein to escape the host's immune system, but once the host population adapts, the pressure on that protein might disappear.
-likelihood ratio test to determine if beta+ > &alpha; at a site. The
-estimates aggregate information over a proportion of branches at a site,
+Standard methods that assume a single dN/dS rate across the entire phylogeny might miss this kind of episodic selection. MEME addresses this by modeling the dN/dS ratio at each site as a mixture of two or more rate classes. For each site, MEME infers the probability that it evolves under each rate class on a given branch. This allows the model to identify sites that show evidence of positive selection (dN/dS > 1) even if that selection is confined to a small number of lineages.
-so the signal is derived from episodic diversification, which is a
-combination of strength of selection [effect size] and the proportion of
+**The Test**
-the tree affected. A subset of branches can be selected for testing as
-well, in which case an additional (nuisance) parameter will be inferred
+For each site, MEME fits a baseline model where dN/dS is constrained to be less than or equal to 1, and an alternative model where a proportion of branches are allowed to have a dN/dS ratio greater than 1. A likelihood ratio test (LRT) is then used to determine if the alternative model provides a significantly better fit to the data. A significant p-value indicates that the site has experienced episodic diversifying selection.
--- the non-synonymous rate on branches NOT selected for testing.
+**Input**
-Input
------
 1. A *FASTA* sequence alignment.
 2. A phylogenetic tree in the *Newick* format
 Note: the names of sequences in the alignment must match the names of the sequences in the tree.
-Output
+**Output**
-------
 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).
 A custom visualization module for viewing these results is available (see http://vision.hyphy.org/MEME for an example)
-Further reading
+**Further reading**
----------------
 http://hyphy.org/methods/selection-methods/#MEME
-Tool options
+**Tool options**
-------------
 ::
+--alignment         [required] An in-frame codon alignment in one of the formats supported by HyPhy.
---code              Which genetic code to use
+--tree              [conditionally required] A phylogenetic tree (optionally annotated with {}).
+--code              Which genetic code to use (see tool form for available options).
 --branches          Which branches should be tested for selection?
-All [default] : test all branches
+All [default] : test all branches.
+Internal : test only internal branches (suitable for intra-host pathogen evolution for example, where terminal branches may contain polymorphism data).
-Internal : test only internal branches (suitable for
+Leaves: test only terminal (leaf) branches.
-intra-host pathogen evolution for example, where terminal branches
+Unlabeled: if the Newick string is labeled using the {} notation, test only branches without explicit labels (see http://hyphy.org/tutorials/phylotree/).
-may contain polymorphism data)
+Custom : Enter a branch label.
-Leaves: test only terminal (leaf) branches
+--pvalue            The significance level used to determine significance (default: 0.1, range: 0 to 1).
-Unlabeled: if the Newick string is labeled using the {} notation,
+--resample          Perform parametric bootstrap resampling to derive site-level null LRT distributions.
-test only branches without explicit labels
+Warning: This will result in a significantly slower analysis. A value of 0 means no resampling is performed. This parameter specifies the maximum number of replicates per site (default: 0, range: 0 to 1000).
-(see http://hyphy.org/tutorials/phylotree/)
+--rates             The number omega rate classes to include in the model (default: 2, range: 2 to 4).
---pvalue           The significance level used to determine significance
+--multiple-hits     Include support for multiple nucleotide substitutions.
+Double : Include branch-specific rates for double nucleotide substitutions.
+Double+Triple : Include branch-specific rates for double and triple nucleotide substitutions.
+None [default] : Use standard models which permit only single nucleotide changes to occur instantly.
+--site-multihit     Estimate multiple hit rates for each site. This option is available only if 'Include support for multiple nucleotide substitutions' is set to 'Double' or 'Double+Triple'.
+Estimate [default] : Estimate multiple hit rates.
+No : Do not estimate multiple hit rates.
+--impute-states     Use site-level model fits to impute likely character states for each sequence (default: No).
+--precision         Optimization precision settings for preliminary fits.
+Standard [default]
+Reduced for faster fitting
+--kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency.
+Yes [default] : Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise).
+Constrain : Keep zero-length branches, but constrain their values to 0.
+No : Keep all branches.
+--restrict-sites    Restrict MEME analysis to a subset of sites. If Yes, allows specifying a subset of sites for analysis.
+Yes : Restrict analysis to a subset of sites.
+No [default] : Do not restrict analysis to a subset of sites.
+--limit-to-sites    Only analyze sites whose 1-based indices match the following list (null to skip). This option is available only if 'Restrict MEME analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices.
+--save-lf-for-sites For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip). This option is available only if 'Restrict MEME analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices.
+--full-model        Perform branch length re-optimization under the full codon model (default: Yes).
 ]]></help>
 <expand macro="citations">
 <citation type="doi">10.1371/journal.pgen.1002764</citation>
 </expand>

Mercurial > repos > iuc > hyphy_meme

comparison hyphy_meme.xml @ 35:ff2724696a7d draft default tip