comparison hyphy_cfel.xml @ 13:fea3f6a79104 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
author iuc
date Tue, 07 Oct 2025 20:41:29 +0000
parents 8aec341d10ec
children
comparison
equal deleted inserted replaced
12:8aec341d10ec 13:fea3f6a79104
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[ 7 <command detect_errors="exit_code"><![CDATA[
8 @SYMLINK_FILES@ 8 @SYMLINK_FILES@
9 hyphy contrast-fel 9 @HYPHYMPI@ contrast-fel
10 --alignment $input_file
11 @INPUT_TREE@
10 --code '$gencodeid' 12 --code '$gencodeid'
11 #for $i, $branch in enumerate($branch_repeat): 13 #for $i, $branch in enumerate($branch_repeat):
12 --branch-set '$branch.branch_label' 14 --branch-set '$branch.branch_label'
13 #end for 15 #end for
14 --srv '$srv' 16 --srv '$advanced_settings.srv'
15 --permutations '$permutations' 17 --permutations '$advanced_settings.permutations'
16 --pvalue '$pvalue' 18 --pvalue '$pvalue'
17 --qvalue '$qvalue' 19 --qvalue '$qvalue'
20 #if $advanced_settings.limit_to_sites:
21 --limit-to-sites '$advanced_settings.limit_to_sites'
22 #end if
23 #if $advanced_settings.save_lf_for_sites:
24 --save-lf-for-sites '$advanced_settings.save_lf_for_sites'
25 #end if
26 #if $advanced_settings.intermediate_fits:
27 --intermediate-fits intermediate_fits.json
28 #end if
29 --kill-zero-lengths $advanced_settings.kill_zero_lengths
18 --output '$cfel_output' 30 --output '$cfel_output'
19 $input_file 31 > cfel_stdout.md
20 @INPUT_TREE@
21 @ERRORS@ 32 @ERRORS@
22 ]]></command> 33 ]]></command>
23 <inputs> 34 <inputs>
24 <expand macro="inputs"/> 35 <expand macro="inputs"/>
25 <expand macro="gencode"/> 36 <expand macro="gencode"/>
26 <repeat name="branch_repeat" title="Branch set" min="1"> 37 <repeat name="branch_repeat" title="Branch set" min="1">
27 <param argument="--branch-set" name="branch_label" type="text" value="Test" optional="false" label="Label for set"> 38 <param argument="--branch-set" name="branch_label" type="text" value="Test" optional="false" label="Label for set" help="The label for the branches to be tested. &quot;Terminal branches&quot;, &quot;Internal branches&quot;, &quot;Random set of branches&quot;, and &quot;Unlabeled branches&quot; are choices that exist in addition to the label sets in the Newick tree.">
28 <sanitizer invalid_char=""> 39 <sanitizer invalid_char="">
29 <valid initial="default" /> 40 <valid initial="default" />
30 </sanitizer> 41 </sanitizer>
31 </param> 42 </param>
32 </repeat> 43 </repeat>
33 <param argument="--permutations" type="boolean" truevalue="Yes" falsevalue="No" label="Perform permutation significance tests"/> 44
34 <param argument="--pvalue" type="float" value=".05" min="0" max="1" label="Significance value for site-tests"/> 45 <param argument="--pvalue" type="float" value=".05" min="0" max="1" label="Significance value for site-tests" help="Significance value for site-tests"/>
35 <param argument="--qvalue" type="float" value=".2" min="0" max="1" label="Significance value for FDR reporting"/> 46 <param argument="--qvalue" type="float" value=".2" min="0" max="1" label="Significance value for FDR reporting" help="Significance value for FDR reporting"/>
36 <param argument="--srv" type="select" label="Include synonymous rate variation" help = "Allow synonymous rates to vary from site to site"> 47
37 <option value="Yes">Yes (recommended)</option> 48 <section name="advanced_settings" title="Advanced Options" expanded="false">
38 <option value="No">No</option> 49 <param argument="--limit-to-sites" type="text" optional="true" label="Limit analysis to specific sites" help="Only analyze sites whose 1-based indices match the following list (null to skip)"/>
39 </param> 50 <param argument="--save-lf-for-sites" type="text" optional="true" label="Save likelihood function snapshots for sites" help="For sites whose 1-based indices match the following list, write out likelihood function snapshots (null to skip)"/>
51 <param name="intermediate_fits" type="boolean" truevalue="Yes" falsevalue="No" label="Save intermediate fits" help="Use/save parameter estimates from 'initial-guess' model fits to a JSON file"/>
52 <param argument="--srv" type="select" label="Include synonymous rate variation" help = "Allow synonymous rates to vary from site to site">
53 <option value="Yes">Yes (recommended)</option>
54 <option value="No">No</option>
55 </param>
56 <param argument="--permutations" type="boolean" truevalue="Yes" falsevalue="No" label="Perform permutation significance tests" help="Perform permutation significance tests"/>
57 <expand macro="kill_zero_lengths_param"/>
58 </section>
40 </inputs> 59 </inputs>
41 <outputs> 60 <outputs>
42 <data name="cfel_output" format="hyphy_results.json" /> 61 <data name="cfel_output" format="hyphy_results.json" />
62 <data name="cfel_md_report" format="markdown" from_work_dir="cfel_stdout.md" label="CFEL Report (Markdown) for ${tool.name} on ${on_string}" />
43 </outputs> 63 </outputs>
44 <tests> 64 <tests>
45 <test> 65 <test expect_num_outputs="2">
46 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/> 66 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/>
47 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/> 67 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/>
48 <repeat name="branch_repeat"> 68 <repeat name="branch_repeat">
49 <param name="branch_label" value="Internal branches" /> 69 <param name="branch_label" value="Internal branches" />
50 </repeat> 70 </repeat>
51 <repeat name="branch_repeat"> 71 <repeat name="branch_repeat">
52 <param name="branch_label" value="Terminal branches" /> 72 <param name="branch_label" value="Terminal branches" />
53 </repeat> 73 </repeat>
54 <output name="cfel_output" file="cfel-out1.json" compare="sim_size"/> 74 <output name="cfel_output">
75 <assert_contents>
76 <has_text text="beta (internal)"/>
77 <has_text text="branch attributes"/>
78 </assert_contents>
79 </output>
80 <output name="cfel_md_report">
81 <assert_contents>
82 <has_text text="Selected 4 branches in group _leaf_ : `Pig, Cow, Baboon, Rat`"/>
83 <has_text text="Permutation p-value"/>
84 </assert_contents>
85 </output>
55 </test> 86 </test>
56 </tests> 87 </tests>
57 <help><![CDATA[ 88 <help><![CDATA[
58 Contrast-FEL : A Test for Differences in Selective Pressures at Individual Sites among Clades and Sets of Branches 89 Contrast-FEL : A Test for Differences in Selective Pressures at Individual Sites among Clades and Sets of Branches
59 ================================================================================================================== 90 ==================================================================================================================
60 91
61 A number of evolutionary hypotheses can be tested by comparing selective pressures among sets of branches in a phylogenetic tree. When the question of interest is to identify specific sites within genes that may be evolving differently, a common approach is to perform separate analyses on subsets of sequences and compare parameter estimates in a post hoc fashion. This approach is statistically suboptimal and not always applicable. Here, we develop a simple extension of a popular fixed effects likelihood method in the context of codon-based evolutionary phylogenetic maximum likelihood testing, Contrast-FEL. It is suitable for identifying individual alignment sites where any among the K≥2 sets of branches in a phylogenetic tree have detectably different ω ratios, indicative of different selective regimes. Using extensive simulations, we show that Contrast-FEL delivers good power, exceeding 90% for sufficiently large differences, while maintaining tight control over false positive rates, when the model is correctly specified. We conclude by applying Contrast-FEL to data from five previously published studies spanning a diverse range of organisms and focusing on different evolutionary questions. 92 Brief description
93 -----------------
94
95 Contrast-FEL (Fixed Effects Likelihood) is a statistical method designed to identify individual sites within genes that experience different selective pressures among various clades or sets of branches in a phylogenetic tree. It extends the traditional Fixed Effects Likelihood (FEL) method to detect differences in ω ratios (the ratio of nonsynonymous to synonymous substitution rates) using a likelihood-ratio test.
96
97 The intuition behind Contrast-FEL is that if different evolutionary pressures are acting on different parts of a phylogenetic tree, then the ω ratios at specific sites might vary significantly between these groups of branches. For example, a site might be under strong purifying selection in one clade but under positive selection in another. Contrast-FEL allows for the direct comparison of these selective regimes at a site-by-site level.
98
99 This method is particularly useful for testing evolutionary hypotheses that involve comparing selective pressures among predefined sets of branches. It provides site-level resolution for comparing selective pressures, which is often lacking in other approaches. Simulations have shown that Contrast-FEL offers good power and maintains control over false positive rates when the model is correctly specified.
100
101 Methodology and Intuition
102 -------------------------
103
104 Contrast-FEL operates by comparing evolutionary rates at individual sites across different predefined branch sets in a phylogenetic tree. The core idea is to detect shifts in selective pressure (quantified by the ω ratio, dN/dS) that are specific to certain lineages or clades.
105
106 1. **Site-wise Likelihood Calculation:** For each site in the alignment, Contrast-FEL estimates the synonymous (α) and nonsynonymous (β) substitution rates. Crucially, it estimates a separate nonsynonymous rate (β) for each specified branch set, while the synonymous rate (α) is shared across all branches. This allows for direct comparison of selective pressures.
107
108 2. **Hypothesis Testing:** The method then performs a likelihood-ratio test (LRT) for each site. The null hypothesis is that the ω ratios are the same across all specified branch sets for that site. The alternative hypothesis is that at least one branch set has a significantly different ω ratio.
109
110 3. **Permutation Testing (Optional):** To account for potential biases and improve the robustness of significance calls, Contrast-FEL can perform permutation tests. In this approach, branch labels are permuted across the tree, and the analysis is re-run multiple times. This generates an empirical null distribution of LRT statistics, which can then be used to calculate more accurate p-values.
111
112 4. **False Discovery Rate (FDR) Control:** To address the multiple testing problem inherent in site-wise analyses, Contrast-FEL applies a False Discovery Rate (FDR) correction (e.g., Benjamini-Hochberg procedure) to the p-values. This helps to control the proportion of false positives among the significant sites.
113
114 5. **Interpretation of ω Ratios:**
115
116 * **ω < 1 (Purifying Selection):** Nonsynonymous mutations are deleterious and are removed by selection.
117 * **ω = 1 (Neutral Evolution):** Nonsynonymous mutations are neither advantageous nor deleterious.
118 * **ω > 1 (Positive Selection):** Nonsynonymous mutations are advantageous and are favored by selection.
119
120 By comparing the site-specific ω ratios among different branch sets, Contrast-FEL can pinpoint sites that have undergone adaptive evolution (positive selection) or strong functional constraint (purifying selection) in specific lineages, providing insights into the evolutionary history and functional divergence of genes.
121
122 Input
123 -----
124
125 1. A *FASTA* sequence alignment.
126 2. A phylogenetic tree in the *Newick* format
127
128 Note: the names of sequences in the alignment must match the names of the sequences in the tree.
129
130 Output
131 ------
132
133 A JSON file with analysis results.
134
135 A Markdown file with a summary of the analysis.
136
137 Tool options
138 ------------
139 ::
140
141 --code Which genetic code to use.
142
143 --branch-set The set of branches to use for testing.
144
145 --srv Include synonymous rate variation in the model.
146 Yes (recommended): Allow synonymous rates to vary from site to site.
147 No: Do not allow synonymous rates to vary from site to site.
148
149 --permutations Perform permutation significance tests.
150
151 --p-value Significance value for site-tests.
152
153 --q-value Significance value for FDR reporting.
154
155 Advanced parameters
156 ...................
157
158 --limit-to-sites Only analyze sites whose 1-based indices match the following list (null to skip).
159
160 --save-lf-for-sites For sites whose 1-based indices match the following list, write out likelihood function snapshots (null to skip).
161
162 --intermediate-fits Use/save parameter estimates from 'initial-guess' model fits to a JSON file.
163
164 --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency.
62 ]]> 165 ]]>
63 166
64 </help> 167 </help>
65 168
66 <expand macro="citations"> 169 <expand macro="citations">