diff hyphy_prime.xml @ 27:9e63dd9b1a1b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
author iuc
date Tue, 07 Oct 2025 20:37:17 +0000
parents 7f13d0cd42e6
children
line wrap: on
line diff
--- a/hyphy_prime.xml	Thu Mar 02 15:03:27 2023 +0000
+++ b/hyphy_prime.xml	Tue Oct 07 20:37:17 2025 +0000
@@ -12,119 +12,232 @@
             @INPUT_TREE@
             --code '$gencodeid'
             @branch_options@
+            #if $prop_source.prop_source_type == "builtin":
+                --property-set '$prop_source.prop_set'
+            #else:
+                --property-set "Custom"
+                --property-file '$prop_source.property_file'
+            #end if
             --pvalue '$p_value'
-            --properties '$prop_set'
             --impute-states '$impute_states'
+            #if $save_intermediate:
+                --intermediate-fits 'intermediate_fits.json'
+            #end if
+            --kill-zero-lengths '$kill_zero_lengths'
             --output '$prime_output'
+            > '$prime_md_report' 
         @ERRORS@
     ]]></command>
     <inputs>
         <expand macro="inputs"/>
         <expand macro="gencode"/>
         <expand macro="branches"/>
-        <param argument="--properties" name="prop_set" type="select" label="Biochemical properties to use">
-            <option value="Atchley">Atchley</option>
-            <option value="LCAP">LCAP</option>
-        </param>
+        <conditional name="prop_source">
+            <param name="prop_source_type" type="select" label="Source of amino-acid properties" help="Select the source of amino-acid properties.">
+                <option value="builtin" selected="true">Use a built-in property set</option>
+                <option value="custom">Provide a custom property file</option>
+            </param>
+            <when value="builtin">
+                <param argument="--properties" name="prop_set" type="select" label="Biochemical properties to use">
+                    <option value="Atchley">Atchley</option>
+                    <option value="LCAP">LCAP</option>
+                    <option value="Random-2">Two random properties</option>
+                    <option value="Random-3">Three random properties</option>
+                    <option value="Random-4">Four random properties</option>
+                    <option value="Random-5">Five random properties</option>
+                </param>
+            </when>
+            <when value="custom">
+                <param argument="--property-file" type="data" format="json" label="Custom property file (JSON)"/>
+            </when>
+        </conditional>
         <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/>
-        <param argument="--impute-states" type="boolean" truevalue="Yes" falsevalue="No" label="Use site-level model fits to impute likely character states for each sequence"/>
+        <section name="advanced_options" title="Advanced Options" expanded="false">
+            <param argument="--impute-states" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Use site-level model fits to impute likely character states for each sequence"/>
+            <param name="save_intermediate" type="hidden" value="false"/>
+            <expand macro="kill_zero_lengths_param"/>
+        </section>
     </inputs>
     <outputs>
-        <data name="prime_output" format="hyphy_results.json" />
+        <data name="prime_output" format="hyphy_results.json" label="PRIME results on ${on_string}"/>
+        <data name="prime_md_report" format="markdown" from_work_dir="prime_stdout.md" label="PRIME Report (Markdown) for ${tool.name} on ${on_string}" />
     </outputs>
     <tests>
-        <test>
-            <param name="input_file" ftype="fasta" value="prime-in1.fa"/>
-            <param name="input_nhx" ftype="nhx" value="prime-in1.nhx"/>
+        <test expect_num_outputs="2">
+            <param name="input_file" value="prime-in1.fa"/>
+            <param name="input_nhx" value="prime-in1.nhx"/>
+            <conditional name="branch_cond">
+                <param name="branch_sel" value="All"/>
+            </conditional>
+            <conditional name="prop_source">
+                <param name="prop_source_type" value="builtin"/>
+                <param name="prop_set" value="Atchley"/>
+            </conditional>
+            <param name="p_value" value="0.1"/>
+            <section name="advanced_options">
+                <param name="impute_states" value="false"/>
+                <param name="save_intermediate" value="false"/>
+                <param name="kill_zero_lengths" value="Yes"/>
+            </section>
+            <output name="prime_output">
+                <assert_contents>
+                    <has_text text='p-value for non-zero effect of Factor III volume'/>
+                    <has_text text='substitutions'/>
+                </assert_contents>
+            </output>
+            <output name="prime_md_report">
+                <assert_contents>
+                    <has_text text="Using the following set of **5** properties"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_file" value="prime-in1.fa"/>
+            <param name="input_nhx" value="prime-in1.nhx"/>
             <conditional name="branch_cond">
                 <param name="branch_sel" value="All"/>
             </conditional>
+            <conditional name="prop_source">
+                <param name="prop_source_type" value="builtin"/>
+                <param name="prop_set" value="LCAP"/>
+            </conditional>
             <param name="p_value" value="0.1"/>
-            <param name="prop_set" value="Atchley"/>
-            <output name="prime_output" file="prime-out1.json" compare="sim_size"/>
+            <section name="advanced_options">
+                <param name="impute_states" value="false"/>
+                <param name="save_intermediate" value="false"/>
+                <param name="kill_zero_lengths" value="Yes"/>
+            </section>
+            <output name="prime_output">
+                <assert_contents>
+                    <has_text text='Log likelihood when there is no effect of Iso-electric Point'/>
+                    <has_text text='substitutions'/>
+                </assert_contents>
+            </output>
+            <output name="prime_md_report">
+                <assert_contents>
+                    <has_text text="* Iso-electric Point"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_file" value="prime-in1.fa"/>
+            <param name="input_nhx" value="prime-in1.nhx"/>
+            <conditional name="branch_cond">
+                <param name="branch_sel" value="Internal"/>
+            </conditional>
+            <conditional name="prop_source">
+                <param name="prop_source_type" value="builtin"/>
+                <param name="prop_set" value="Atchley"/>
+            </conditional>
+            <param name="p_value" value="0.1"/>
+            <section name="advanced_options">
+                <param name="impute_states" value="false"/>
+                <param name="save_intermediate" value="false"/>
+                <param name="kill_zero_lengths" value="Yes"/>
+            </section>
+            <output name="prime_output">
+                <assert_contents>
+                    <has_text text='"tested"'/>
+                    <has_text text='p-value for non-zero effect of Factor III volume'/>
+                </assert_contents>
+            </output>
+            <output name="prime_md_report">
+                <assert_contents>
+                    <has_text text="Selected 7 branches"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_file" value="prime-in1.fa"/>
+            <param name="input_nhx" value="prime-in1.nhx"/>
+            <conditional name="branch_cond">
+                <param name="branch_sel" value="All"/>
+            </conditional>
+            <conditional name="prop_source">
+                <param name="prop_source_type" value="builtin"/>
+                <param name="prop_set" value="Random-2"/>
+            </conditional>
+            <param name="p_value" value="0.1"/>
+            <section name="advanced_options">
+                <param name="impute_states" value="true"/>
+                <param name="save_intermediate" value="false"/>
+                <param name="kill_zero_lengths" value="Yes"/>
+            </section>
+            <output name="prime_output">
+                <assert_contents>
+                     <has_text text='"Imputed States"'/>
+                     <has_text text='p-value for non-zero effect of Random Factor 2'/>
+                </assert_contents>
+            </output>
+            <output name="prime_md_report">
+                <assert_contents>
+                    <has_text text="impute-states => Yes"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[
-PRIME: Property Informed Model of Evolution
-===========================================
-
-What question does this method answer?
---------------------------------------
-
-Does evolution at specific sites in a coding alignment preserve or alter some biochemical properties?
+**What question does this method answer?**
 
-Recommended Applications
-------------------------
-
-Identify biochemical evolutionary constraints or changes with site level resolution: e.g. site 23 is
-evolving to conserve residue polarity, but alter it's volume.
-
-
-Brief description
------------------
+Does evolution at specific sites in a coding alignment preserve or alter a set of pre-defined biochemical properties?
 
-Most methods of coding sequence analysis do not take direct account of the fact that
-the rate at which amino-acids are exchanged is different depending on the amino-acids.
-While this seems obvious (e.g. radical changes should happen slower), there are many technical reasons
-for why the standard assumption of "one-rate for all residues" holds.
-
+**Recommended Applications**
 
-Given a set of N amino-acid properties, fit a site-level model where non-synonymous rates
-depend on how much a non-synonymous substitution changes the properties
-of the residue, beta (X,Y) = Exp (log_omega - lambda_1 * diff_1 (X,Y )-
-lambda_2 * diff_2 (X,Y) -...). When lambda_k > 0, changes in property k
-are disfavored and when lambda_k < 0 -- they are promoted. At each site,
-N+1 tests are performed (one for each property, and an omnibus test).
+- Identify biochemical evolutionary constraints or changes with site-level resolution (e.g. site 23 is evolving to conserve residue polarity, but alter its volume).
 
-Input
------
-
-1. A *FASTA* sequence alignment.
-2. A phylogenetic tree in the *Newick* format
-
-Note: the names of sequences in the alignment must match the names of the sequences in the tree.
-
-
-Output
+Method
 ------
 
-A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf).
+**Background: Limitations of standard dN/dS models**
+
+Standard models for detecting natural selection in coding sequences use the dN/dS ratio (ω) to quantify selection pressure. These models typically assume that all non-synonymous substitutions have the same rate, regardless of the specific amino acid change. This is a simplification, as substitutions between biochemically similar amino acids (e.g., Leucine to Isoleucine) are expected to occur more frequently than substitutions between dissimilar ones (e.g., Arginine to Cysteine).
 
-A custom visualization module for viewing these results will soon be available at http://vision.hyphy.org/
+**PRIME: A Property-Informed Model**
 
-Further reading
----------------
+PRIME (PRoperty-Informed Models of Evolution) extends the standard dN/dS framework by incorporating the biochemical properties of amino acids directly into the substitution model. Instead of a single rate for all non-synonymous changes, PRIME models these rates as a function of the changes in specific biochemical properties between the original and the new amino acid.
+
+**The Intuition**
 
-http://hyphy.org/methods/selection-methods/#PRIME
+The core idea is that the fitness cost or benefit of a mutation is often related to how it alters the biochemical characteristics of the resulting protein. PRIME formalizes this by modeling the non-synonymous substitution rate between amino acid `i` and `j` as a function of a baseline dN/dS ratio (ω) and a set of property-specific parameters (λ).
 
+For each biochemical property `p` (e.g., volume, polarity), the model includes a parameter `λ_p`. This parameter quantifies the extent to which evolution at a given site favors or disfavors changes in that property.
 
-Tool options
-------------
-::
+-   If **λ_p > 0**, changes in property `p` are penalized. This indicates **conservative selection** with respect to that property. For example, a large positive λ for "volume" means that substitutions that significantly alter the amino acid's volume are selected against.
+-   If **λ_p < 0**, changes in property `p` are favored. This indicates **radical selection** with respect to that property. For example, a large negative λ for "charge" means that substitutions that change the amino acid's charge are selected for.
 
+**The Test**
 
-    --code              Which genetic code to use
+For each site in the alignment, PRIME performs a likelihood ratio test (LRT) to determine if this more complex, property-informed model is a significantly better fit to the data than a standard dN/dS model. A significant p-value for a specific property suggests that the evolution at that site has been shaped by selection to conserve or radically alter that biochemical property. An omnibus test is also performed to assess the overall significance of all properties combined.
+
+**Input**
 
-    --branches          Which branches should be tested for selection?
-                            All [default] : test all branches
+- **Sequence Alignment:** An in-frame codon alignment in FASTA or NEXUS format.
+- **Phylogenetic Tree:** A phylogenetic tree in Newick format. The names of the sequences in the alignment must match the names of the tips in the tree.
 
-                            Internal : test only internal branches (suitable for
-                            intra-host pathogen evolution for example, where terminal branches
-                            may contain polymorphism data)
-
-                            Leaves: test only terminal (leaf) branches
+**Tool Options**
 
-                            Unlabeled: if the Newick string is labeled using the {} notation,
-                            test only branches without explicit labels
-                            (see http://hyphy.org/tutorials/phylotree/)
-
-     --pvalue           The significance level used to determine significance
+- **Genetic code:** The genetic code to use for translation.
+- **Branches to test:** Select which branches of the tree to include in the analysis (All, Internal, or Leaves).
+- **Source of amino-acid properties:**
+    - **Use a built-in property set:** Choose from a list of pre-defined sets of amino-acid properties.
+        - `Atchley`: Five properties derived from a factor analysis of 500 amino-acid properties.
+        - `LCAP`: Four properties from the LCAP model of Conant and Stadler.
+        - `Random-*`: Sets of 2, 3, 4, or 5 random properties for null hypothesis testing.
+    - **Provide a custom property file:** Supply your own set of properties in a JSON file.
+- **P-value threshold:** The significance level for the likelihood ratio test.
+- **Impute states:** Use the fitted model to infer the most likely character states at each internal node of the tree.
+- **Save intermediate model fits:** Save the parameter estimates from the initial model fits to a separate JSON file.
+- **Kill zero-length branches:** Automatically remove internal branches of length zero for computational efficiency.
 
-     --properties       Which property set to use
-                            Atchley : Use the five properties derived from a factor analysis of 500 amino-acid properties [Table 2 in PNAS (2005) 102(18) 6395-6400 doi: 10.1073/pnas.0408677102]
-                            LCAP: Use the five properties defined in the Conant and Stadler LCAP model [Mol Biol Evol (2009) 26 (5): 1155-1161. doi: 10.1093/molbev/msp031]
+**Output**
+
+- A JSON file with the detailed analysis results, including site-level p-values and parameter estimates.
+- A Markdown report summarizing the analysis and results.
 
+**Further reading**
 
+- http://hyphy.org/methods/selection-methods/#PRIME
+- http://hyphy.org/resources/json-fields.pdf
     ]]></help>
     <expand macro="citations" />
 </tool>
\ No newline at end of file