comparison hyphy_prime.xml @ 27:9e63dd9b1a1b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
author iuc
date Tue, 07 Oct 2025 20:37:17 +0000
parents 7f13d0cd42e6
children
comparison
equal deleted inserted replaced
26:7f13d0cd42e6 27:9e63dd9b1a1b
10 @HYPHYMPI@ prime 10 @HYPHYMPI@ prime
11 --alignment ./$input_file 11 --alignment ./$input_file
12 @INPUT_TREE@ 12 @INPUT_TREE@
13 --code '$gencodeid' 13 --code '$gencodeid'
14 @branch_options@ 14 @branch_options@
15 #if $prop_source.prop_source_type == "builtin":
16 --property-set '$prop_source.prop_set'
17 #else:
18 --property-set "Custom"
19 --property-file '$prop_source.property_file'
20 #end if
15 --pvalue '$p_value' 21 --pvalue '$p_value'
16 --properties '$prop_set'
17 --impute-states '$impute_states' 22 --impute-states '$impute_states'
23 #if $save_intermediate:
24 --intermediate-fits 'intermediate_fits.json'
25 #end if
26 --kill-zero-lengths '$kill_zero_lengths'
18 --output '$prime_output' 27 --output '$prime_output'
28 > '$prime_md_report'
19 @ERRORS@ 29 @ERRORS@
20 ]]></command> 30 ]]></command>
21 <inputs> 31 <inputs>
22 <expand macro="inputs"/> 32 <expand macro="inputs"/>
23 <expand macro="gencode"/> 33 <expand macro="gencode"/>
24 <expand macro="branches"/> 34 <expand macro="branches"/>
25 <param argument="--properties" name="prop_set" type="select" label="Biochemical properties to use"> 35 <conditional name="prop_source">
26 <option value="Atchley">Atchley</option> 36 <param name="prop_source_type" type="select" label="Source of amino-acid properties" help="Select the source of amino-acid properties.">
27 <option value="LCAP">LCAP</option> 37 <option value="builtin" selected="true">Use a built-in property set</option>
28 </param> 38 <option value="custom">Provide a custom property file</option>
39 </param>
40 <when value="builtin">
41 <param argument="--properties" name="prop_set" type="select" label="Biochemical properties to use">
42 <option value="Atchley">Atchley</option>
43 <option value="LCAP">LCAP</option>
44 <option value="Random-2">Two random properties</option>
45 <option value="Random-3">Three random properties</option>
46 <option value="Random-4">Four random properties</option>
47 <option value="Random-5">Five random properties</option>
48 </param>
49 </when>
50 <when value="custom">
51 <param argument="--property-file" type="data" format="json" label="Custom property file (JSON)"/>
52 </when>
53 </conditional>
29 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/> 54 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value threshold"/>
30 <param argument="--impute-states" type="boolean" truevalue="Yes" falsevalue="No" label="Use site-level model fits to impute likely character states for each sequence"/> 55 <section name="advanced_options" title="Advanced Options" expanded="false">
56 <param argument="--impute-states" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Use site-level model fits to impute likely character states for each sequence"/>
57 <param name="save_intermediate" type="hidden" value="false"/>
58 <expand macro="kill_zero_lengths_param"/>
59 </section>
31 </inputs> 60 </inputs>
32 <outputs> 61 <outputs>
33 <data name="prime_output" format="hyphy_results.json" /> 62 <data name="prime_output" format="hyphy_results.json" label="PRIME results on ${on_string}"/>
63 <data name="prime_md_report" format="markdown" from_work_dir="prime_stdout.md" label="PRIME Report (Markdown) for ${tool.name} on ${on_string}" />
34 </outputs> 64 </outputs>
35 <tests> 65 <tests>
36 <test> 66 <test expect_num_outputs="2">
37 <param name="input_file" ftype="fasta" value="prime-in1.fa"/> 67 <param name="input_file" value="prime-in1.fa"/>
38 <param name="input_nhx" ftype="nhx" value="prime-in1.nhx"/> 68 <param name="input_nhx" value="prime-in1.nhx"/>
39 <conditional name="branch_cond"> 69 <conditional name="branch_cond">
40 <param name="branch_sel" value="All"/> 70 <param name="branch_sel" value="All"/>
41 </conditional> 71 </conditional>
42 <param name="p_value" value="0.1"/> 72 <conditional name="prop_source">
43 <param name="prop_set" value="Atchley"/> 73 <param name="prop_source_type" value="builtin"/>
44 <output name="prime_output" file="prime-out1.json" compare="sim_size"/> 74 <param name="prop_set" value="Atchley"/>
75 </conditional>
76 <param name="p_value" value="0.1"/>
77 <section name="advanced_options">
78 <param name="impute_states" value="false"/>
79 <param name="save_intermediate" value="false"/>
80 <param name="kill_zero_lengths" value="Yes"/>
81 </section>
82 <output name="prime_output">
83 <assert_contents>
84 <has_text text='p-value for non-zero effect of Factor III volume'/>
85 <has_text text='substitutions'/>
86 </assert_contents>
87 </output>
88 <output name="prime_md_report">
89 <assert_contents>
90 <has_text text="Using the following set of **5** properties"/>
91 </assert_contents>
92 </output>
93 </test>
94 <test expect_num_outputs="2">
95 <param name="input_file" value="prime-in1.fa"/>
96 <param name="input_nhx" value="prime-in1.nhx"/>
97 <conditional name="branch_cond">
98 <param name="branch_sel" value="All"/>
99 </conditional>
100 <conditional name="prop_source">
101 <param name="prop_source_type" value="builtin"/>
102 <param name="prop_set" value="LCAP"/>
103 </conditional>
104 <param name="p_value" value="0.1"/>
105 <section name="advanced_options">
106 <param name="impute_states" value="false"/>
107 <param name="save_intermediate" value="false"/>
108 <param name="kill_zero_lengths" value="Yes"/>
109 </section>
110 <output name="prime_output">
111 <assert_contents>
112 <has_text text='Log likelihood when there is no effect of Iso-electric Point'/>
113 <has_text text='substitutions'/>
114 </assert_contents>
115 </output>
116 <output name="prime_md_report">
117 <assert_contents>
118 <has_text text="* Iso-electric Point"/>
119 </assert_contents>
120 </output>
121 </test>
122 <test expect_num_outputs="2">
123 <param name="input_file" value="prime-in1.fa"/>
124 <param name="input_nhx" value="prime-in1.nhx"/>
125 <conditional name="branch_cond">
126 <param name="branch_sel" value="Internal"/>
127 </conditional>
128 <conditional name="prop_source">
129 <param name="prop_source_type" value="builtin"/>
130 <param name="prop_set" value="Atchley"/>
131 </conditional>
132 <param name="p_value" value="0.1"/>
133 <section name="advanced_options">
134 <param name="impute_states" value="false"/>
135 <param name="save_intermediate" value="false"/>
136 <param name="kill_zero_lengths" value="Yes"/>
137 </section>
138 <output name="prime_output">
139 <assert_contents>
140 <has_text text='"tested"'/>
141 <has_text text='p-value for non-zero effect of Factor III volume'/>
142 </assert_contents>
143 </output>
144 <output name="prime_md_report">
145 <assert_contents>
146 <has_text text="Selected 7 branches"/>
147 </assert_contents>
148 </output>
149 </test>
150 <test expect_num_outputs="2">
151 <param name="input_file" value="prime-in1.fa"/>
152 <param name="input_nhx" value="prime-in1.nhx"/>
153 <conditional name="branch_cond">
154 <param name="branch_sel" value="All"/>
155 </conditional>
156 <conditional name="prop_source">
157 <param name="prop_source_type" value="builtin"/>
158 <param name="prop_set" value="Random-2"/>
159 </conditional>
160 <param name="p_value" value="0.1"/>
161 <section name="advanced_options">
162 <param name="impute_states" value="true"/>
163 <param name="save_intermediate" value="false"/>
164 <param name="kill_zero_lengths" value="Yes"/>
165 </section>
166 <output name="prime_output">
167 <assert_contents>
168 <has_text text='"Imputed States"'/>
169 <has_text text='p-value for non-zero effect of Random Factor 2'/>
170 </assert_contents>
171 </output>
172 <output name="prime_md_report">
173 <assert_contents>
174 <has_text text="impute-states => Yes"/>
175 </assert_contents>
176 </output>
45 </test> 177 </test>
46 </tests> 178 </tests>
47 <help><![CDATA[ 179 <help><![CDATA[
48 PRIME: Property Informed Model of Evolution 180 **What question does this method answer?**
49 =========================================== 181
50 182 Does evolution at specific sites in a coding alignment preserve or alter a set of pre-defined biochemical properties?
51 What question does this method answer? 183
52 -------------------------------------- 184 **Recommended Applications**
53 185
54 Does evolution at specific sites in a coding alignment preserve or alter some biochemical properties? 186 - Identify biochemical evolutionary constraints or changes with site-level resolution (e.g. site 23 is evolving to conserve residue polarity, but alter its volume).
55 187
56 Recommended Applications 188 Method
57 ------------------------
58
59 Identify biochemical evolutionary constraints or changes with site level resolution: e.g. site 23 is
60 evolving to conserve residue polarity, but alter it's volume.
61
62
63 Brief description
64 -----------------
65
66 Most methods of coding sequence analysis do not take direct account of the fact that
67 the rate at which amino-acids are exchanged is different depending on the amino-acids.
68 While this seems obvious (e.g. radical changes should happen slower), there are many technical reasons
69 for why the standard assumption of "one-rate for all residues" holds.
70
71
72 Given a set of N amino-acid properties, fit a site-level model where non-synonymous rates
73 depend on how much a non-synonymous substitution changes the properties
74 of the residue, beta (X,Y) = Exp (log_omega - lambda_1 * diff_1 (X,Y )-
75 lambda_2 * diff_2 (X,Y) -...). When lambda_k > 0, changes in property k
76 are disfavored and when lambda_k < 0 -- they are promoted. At each site,
77 N+1 tests are performed (one for each property, and an omnibus test).
78
79 Input
80 -----
81
82 1. A *FASTA* sequence alignment.
83 2. A phylogenetic tree in the *Newick* format
84
85 Note: the names of sequences in the alignment must match the names of the sequences in the tree.
86
87
88 Output
89 ------ 189 ------
90 190
91 A JSON file with analysis results (http://hyphy.org/resources/json-fields.pdf). 191 **Background: Limitations of standard dN/dS models**
92 192
93 A custom visualization module for viewing these results will soon be available at http://vision.hyphy.org/ 193 Standard models for detecting natural selection in coding sequences use the dN/dS ratio (ω) to quantify selection pressure. These models typically assume that all non-synonymous substitutions have the same rate, regardless of the specific amino acid change. This is a simplification, as substitutions between biochemically similar amino acids (e.g., Leucine to Isoleucine) are expected to occur more frequently than substitutions between dissimilar ones (e.g., Arginine to Cysteine).
94 194
95 Further reading 195 **PRIME: A Property-Informed Model**
96 --------------- 196
97 197 PRIME (PRoperty-Informed Models of Evolution) extends the standard dN/dS framework by incorporating the biochemical properties of amino acids directly into the substitution model. Instead of a single rate for all non-synonymous changes, PRIME models these rates as a function of the changes in specific biochemical properties between the original and the new amino acid.
98 http://hyphy.org/methods/selection-methods/#PRIME 198
99 199 **The Intuition**
100 200
101 Tool options 201 The core idea is that the fitness cost or benefit of a mutation is often related to how it alters the biochemical characteristics of the resulting protein. PRIME formalizes this by modeling the non-synonymous substitution rate between amino acid `i` and `j` as a function of a baseline dN/dS ratio (ω) and a set of property-specific parameters (λ).
102 ------------ 202
103 :: 203 For each biochemical property `p` (e.g., volume, polarity), the model includes a parameter `λ_p`. This parameter quantifies the extent to which evolution at a given site favors or disfavors changes in that property.
104 204
105 205 - If **λ_p > 0**, changes in property `p` are penalized. This indicates **conservative selection** with respect to that property. For example, a large positive λ for "volume" means that substitutions that significantly alter the amino acid's volume are selected against.
106 --code Which genetic code to use 206 - If **λ_p < 0**, changes in property `p` are favored. This indicates **radical selection** with respect to that property. For example, a large negative λ for "charge" means that substitutions that change the amino acid's charge are selected for.
107 207
108 --branches Which branches should be tested for selection? 208 **The Test**
109 All [default] : test all branches 209
110 210 For each site in the alignment, PRIME performs a likelihood ratio test (LRT) to determine if this more complex, property-informed model is a significantly better fit to the data than a standard dN/dS model. A significant p-value for a specific property suggests that the evolution at that site has been shaped by selection to conserve or radically alter that biochemical property. An omnibus test is also performed to assess the overall significance of all properties combined.
111 Internal : test only internal branches (suitable for 211
112 intra-host pathogen evolution for example, where terminal branches 212 **Input**
113 may contain polymorphism data) 213
114 214 - **Sequence Alignment:** An in-frame codon alignment in FASTA or NEXUS format.
115 Leaves: test only terminal (leaf) branches 215 - **Phylogenetic Tree:** A phylogenetic tree in Newick format. The names of the sequences in the alignment must match the names of the tips in the tree.
116 216
117 Unlabeled: if the Newick string is labeled using the {} notation, 217 **Tool Options**
118 test only branches without explicit labels 218
119 (see http://hyphy.org/tutorials/phylotree/) 219 - **Genetic code:** The genetic code to use for translation.
120 220 - **Branches to test:** Select which branches of the tree to include in the analysis (All, Internal, or Leaves).
121 --pvalue The significance level used to determine significance 221 - **Source of amino-acid properties:**
122 222 - **Use a built-in property set:** Choose from a list of pre-defined sets of amino-acid properties.
123 --properties Which property set to use 223 - `Atchley`: Five properties derived from a factor analysis of 500 amino-acid properties.
124 Atchley : Use the five properties derived from a factor analysis of 500 amino-acid properties [Table 2 in PNAS (2005) 102(18) 6395-6400 doi: 10.1073/pnas.0408677102] 224 - `LCAP`: Four properties from the LCAP model of Conant and Stadler.
125 LCAP: Use the five properties defined in the Conant and Stadler LCAP model [Mol Biol Evol (2009) 26 (5): 1155-1161. doi: 10.1093/molbev/msp031] 225 - `Random-*`: Sets of 2, 3, 4, or 5 random properties for null hypothesis testing.
126 226 - **Provide a custom property file:** Supply your own set of properties in a JSON file.
127 227 - **P-value threshold:** The significance level for the likelihood ratio test.
228 - **Impute states:** Use the fitted model to infer the most likely character states at each internal node of the tree.
229 - **Save intermediate model fits:** Save the parameter estimates from the initial model fits to a separate JSON file.
230 - **Kill zero-length branches:** Automatically remove internal branches of length zero for computational efficiency.
231
232 **Output**
233
234 - A JSON file with the detailed analysis results, including site-level p-values and parameter estimates.
235 - A Markdown report summarizing the analysis and results.
236
237 **Further reading**
238
239 - http://hyphy.org/methods/selection-methods/#PRIME
240 - http://hyphy.org/resources/json-fields.pdf
128 ]]></help> 241 ]]></help>
129 <expand macro="citations" /> 242 <expand macro="citations" />
130 </tool> 243 </tool>