comparison hyphy_fel.xml @ 36:0f71b8724151 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
author iuc
date Tue, 07 Oct 2025 20:36:45 +0000
parents a822a4869772
children
comparison
equal deleted inserted replaced
35:a822a4869772 36:0f71b8724151
5 </macros> 5 </macros>
6 <expand macro="bio_tools"/> 6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements"/>
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 @SYMLINK_FILES@ 9 @SYMLINK_FILES@
10 @SHELL_OPTIONS@
10 @HYPHYMPI@ fel 11 @HYPHYMPI@ fel
11 --alignment ./$input_file 12 --alignment ./$input_file
12 @INPUT_TREE@ 13 @INPUT_TREE@
13 --code '$gencodeid' 14 --code '$gencodeid'
15 --multiple-hits $multiple_hits_conditional.multiple_hits
14 @branch_options@ 16 @branch_options@
15 --srv '$include_srv' 17 --srv '$include_srv'
16 --pvalue '$p_value' 18 --pvalue '$p_value'
17 @resample@ 19 @resample@
20 #if $restrict_sites_conditional.restrict_sites_flag == "true":
21 --limit-to-sites '$restrict_sites_conditional.limit_to_sites'
22 --save-lf-for-sites '$restrict_sites_conditional.save_lf_for_sites'
23 #end if
18 --precision $precision 24 --precision $precision
19 $ci 25 $ci
20 --output '$fel_output' 26 --output '$fel_output'
27 #if $multiple_hits_conditional.multiple_hits != "None":
28 --site-multihit $multiple_hits_conditional.site_multihit
29 #end if
30
31 --kill-zero-lengths $kill_zero_lengths
32 #if $full_model:
33 --full-model $full_model
34 #end if
35 > fel_stdout.md
21 @ERRORS@ 36 @ERRORS@
22 ]]></command> 37 ]]></command>
23 <inputs> 38 <inputs>
24 <expand macro="inputs"/> 39 <expand macro="inputs"/>
25 <expand macro="gencode"/> 40 <expand macro="gencode"/>
26 <expand macro="branches"/> 41 <expand macro="branches"/>
27 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value"/> 42 <param argument="--pvalue" name="p_value" type="float" value=".1" min="0" max="1" label="P-value"/>
28 <param name="include_srv" type="select" label="Include synonymous rate variation" help = "Allow synonymous rates to vary from site to site"> 43 <section name="advanced_options" title="Advanced Options" expanded="false">
29 <option value="Yes">Yes (recommended)</option> 44 <param name="include_srv" type="select" label="Include synonymous rate variation" help = "Allow synonymous rates to vary from site to site">
30 <option value="No">No</option> 45 <option value="Yes">Yes (recommended)</option>
31 </param> 46 <option value="No">No</option>
32 <param argument="--ci" type="boolean" truevalue="--ci Yes" falsevalue="" label="Compute profile likelihood confidence intervals for each variable site" /> 47 </param>
33 <expand macro="resample"/> 48 <conditional name="multiple_hits_conditional">
34 <param argument="--precision" type="select" label="Optimization precision for preliminary fits"> 49 <param argument="--multiple-hits" type="select" label="Include support for multiple nucleotide substitutions">
35 <option value="standard">Standard</option> 50 <option value="Double">Include branch-specific rates for double nucleotide substitutions</option>
36 <option value="reduced">Reduced for faster fitting</option> 51 <option value="Double+Triple">Include branch-specific rates for double and triple nucleotide substitutions</option>
37 </param> 52 <option value="None" selected="true">Use standard models which permit only single nucleotide changes to occur instantly</option>
53 </param>
54 <when value="Double">
55 <param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site">
56 <option value="Estimate" selected="true">Estimate</option>
57 <option value="No">No</option>
58 </param>
59 </when>
60 <when value="Double+Triple">
61 <param argument="--site-multihit" type="select" label="Estimate multiple hit rates for each site">
62 <option value="Estimate" selected="true">Estimate</option>
63 <option value="No">No</option>
64 </param>
65 </when>
66 <when value="None">
67 </when>
68 </conditional>
69 <param argument="--ci" type="boolean" truevalue="--ci Yes" falsevalue="" label="Compute profile likelihood confidence intervals for each variable site" />
70 <expand macro="resample"/>
71 <conditional name="restrict_sites_conditional">
72 <param name="restrict_sites_flag" type="select" label="Restrict FEL analysis to a subset of sites" help="If Yes, allows specifying a subset of sites for analysis.">
73 <option value="true">Yes</option>
74 <option value="false" selected="true">No</option>
75 </param>
76 <when value="true">
77 <param argument="--limit-to-sites" type="text" optional="true" label="Only analyze sites whose 1-based indices match the following list (null to skip)" value="null" help="Comma-separated list of site indices."/>
78 <param argument="--save-lf-for-sites" type="text" optional="true" label="For sites whose 1-based indices match the following list, write out likelihood function snapshots (null to skip)" value="null" help="Comma-separated list of site indices."/>
79 </when>
80 <when value="false">
81 </when>
82 </conditional>
83 <param argument="--precision" type="select" label="Optimization precision for preliminary fits">
84 <option value="standard">Standard</option>
85 <option value="reduced">Reduced for faster fitting</option>
86 </param>
87
88 <expand macro="kill_zero_lengths_param"/>
89 <param argument="--full-model" type="boolean" truevalue="Yes" falsevalue="No" checked="true" label="Perform branch length re-optimization under the full codon model" help="If true, re-optimizes branch lengths under the full codon model."/>
90 </section>
91
38 </inputs> 92 </inputs>
39 <outputs> 93 <outputs>
94 <data name="fel_md_report" format="markdown" from_work_dir="fel_stdout.md" label="FEL Report (Markdown) for ${tool.name} on ${on_string}" />
40 <data name="fel_output" format="hyphy_results.json" /> 95 <data name="fel_output" format="hyphy_results.json" />
96
41 </outputs> 97 </outputs>
42 <tests> 98 <tests>
43 <test> 99 <test expect_num_outputs="2">
44 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/> 100 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/>
45 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/> 101 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/>
46 <output name="fel_output" file="fel-out1.json" compare="sim_size"/> 102 <output name="fel_output">
103 <assert_contents>
104 <has_text text="Likelihood ratio test statistic for beta = alpha, versus beta " />
105 </assert_contents>
106 </output>
107 <output name="fel_md_report">
108 <assert_contents>
109 <has_text text="sites under pervasive positive diversifying and" />
110 </assert_contents>
111 </output>
47 </test> 112 </test>
48 <test> 113 <test expect_num_outputs="2">
49 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/> 114 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/>
50 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/> 115 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/>
51 <param name="ci" value="true" /> 116 <param name="advanced_options|ci" value="true" />
52 <param name="precision" value="reduced" /> 117 <param name="advanced_options|precision" value="reduced" />
53 <output name="fel_output" file="fel-out1.json" compare="sim_size"/> 118 <param name="p_value" value="0.05" />
119 <output name="fel_output">
120 <assert_contents>
121 <has_text text="Likelihood ratio test statistic for beta = alpha, versus beta " />
122 <has_text text="95% profile likelihood CI upper bound for dN/dS (if available)" />
123 </assert_contents>
124 </output>
125 <output name="fel_md_report">
126 <assert_contents>
127 <has_text text="sites under pervasive positive diversifying and" />
128 <has_text text=">precision => reduced" />
129 <has_text text="### For partition 1 these sites are significant at p &lt;=0.05" />
130 </assert_contents>
131 </output>
132 </test>
133 <test expect_num_outputs="2">
134 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/>
135 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/>
136 <section name="advanced_options">
137 <conditional name="restrict_sites_conditional">
138 <param name="restrict_sites_flag" value="true" />
139 <param name="limit_to_sites" value="1,2,3" />
140 </conditional>
141 </section>
142 <output name="fel_output">
143 <assert_contents>
144 <has_text text="&quot;site-filter&quot;:&quot;1,2,3&quot;" />
145 <has_text text="Likelihood ratio test statistic for beta = alpha, versus beta " />
146 </assert_contents>
147 </output>
148 <output name="fel_md_report">
149 <assert_contents>
150 <has_text text=">limit-to-sites => 1,2,3" />
151 <has_text text="sites under pervasive positive diversifying and" />
152 </assert_contents>
153 </output>
54 </test> 154 </test>
55 </tests> 155 </tests>
56 <help><![CDATA[ 156 <help><![CDATA[
57 FEL : Fixed effects likelihood 157 FEL : Fixed effects likelihood
58 ============================== 158 ==============================
59 159
60 What question does this method answer? 160 What question does this method answer?
61 -------------------------------------- 161 --------------------------------------
62 162
63 Which site(s) in a gene are subject to pervasive, i.e. consistently across the entire phylogeny, diversifying selection? 163 FEL (Fixed Effects Likelihood) is a statistical method used to identify individual sites in a gene that are subject to pervasive diversifying selection. It addresses the question: Which specific sites in a gene show evidence of positive selection that has been consistently maintained across the entire evolutionary phylogeny of the analyzed sequences?
64 164
65 Recommended Applications 165 Recommended Applications
66 ------------------------ 166 ------------------------
67 167
68 The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics 168 The phenomenon of pervasive selection is generally most prevalent in pathogen evolution and any biological system influenced by evolutionary arms race dynamics
72 FEL is our recommended method for analyzing small-to-medium size datasets when one wishes only to study pervasive selection at individual sites. 172 FEL is our recommended method for analyzing small-to-medium size datasets when one wishes only to study pervasive selection at individual sites.
73 173
74 Brief description 174 Brief description
75 ----------------- 175 -----------------
76 176
77 FEL (Fixed Effects Likelihood) estimates site-wise synonymous (alpha) 177 FEL (Fixed Effects Likelihood) is a powerful method for detecting pervasive positive or negative selection at individual sites in a coding sequence. It operates by estimating site-wise synonymous (alpha, dS) and non-synonymous (beta, dN) substitution rates using a maximum likelihood approach. For each site, FEL then performs a likelihood ratio test (LRT) to compare a null model (where dN = dS) against an alternative model (where dN != dS). A significant p-value from this test indicates that the site is under selection. The method aggregates information across all branches of the phylogenetic tree, making it suitable for identifying sites under pervasive diversifying selection (dN > dS) or pervasive purifying selection (dN < dS). While primarily designed for pervasive selection, FEL can also infer an additional nuisance parameter for the non-synonymous rate on branches not selected for testing, allowing for analysis of a subset of branches.
78 and non-synonymous rates (beta), and uses a likelihood ratio test to 178
79 determine if beta != alpha at a site. The estimates aggregate 179 **Intuition:** Imagine you're looking at a gene's evolution across different species. Some parts of the gene might change a lot (diversifying selection), while others stay the same (purifying selection). FEL helps pinpoint the exact "letters" (sites) in the gene that are consistently under pressure to change or stay the same throughout its evolutionary history. It does this by comparing how often synonymous (silent) changes happen versus non-synonymous (amino acid altering) changes at each site. If non-synonymous changes happen significantly more often, it suggests positive selection.
80 information over all branches, so the signal is derived from pervasive
81 diversification or conservation. A subset of branches can be selected
82 for testing as well, in which case an additional (nuisance) parameter
83 will be inferred -- the non-synonymous rate on branches NOT selected for
84 testing.
85 180
86 181
87 Input 182 Input
88 ----- 183 -----
89 184
105 http://hyphy.org/methods/selection-methods/#FEL 200 http://hyphy.org/methods/selection-methods/#FEL
106 201
107 202
108 Tool options 203 Tool options
109 ------------ 204 ------------
110
111 :: 205 ::
112 206
113 --code Which genetic code to use 207 --alignment [required] An in-frame codon alignment in one of the formats supported by HyPhy.
208 --tree [conditionally required] A phylogenetic tree (optionally annotated with {}).
209
210 --code Which genetic code to use (see tool form for available options).
211
212 --multiple-hits Include support for multiple nucleotide substitutions.
213 Double : Include branch-specific rates for double nucleotide substitutions.
214 Double+Triple : Include branch-specific rates for double and triple nucleotide substitutions.
215 None [default] : Use standard models which permit only single nucleotide changes to occur instantly.
216
217 --site-multihit Estimate multiple hit rates for each site. This option is available only if 'Include support for multiple nucleotide substitutions' is set to 'Double' or 'Double+Triple'.
218 Estimate [default] : Estimate multiple hit rates.
219 No : Do not estimate multiple hit rates.
114 220
115 --branches Which branches should be tested for selection? 221 --branches Which branches should be tested for selection?
116 All [default] : test all branches 222 All [default] : test all branches.
117 223 Internal : test only internal branches (suitable for intra-host pathogen evolution for example, where terminal branches may contain polymorphism data).
118 Internal : test only internal branches (suitable for 224 Leaves: test only terminal (leaf) branches.
119 intra-host pathogen evolution for example, where terminal branches 225 Unlabeled: if the Newick string is labeled using the {} notation, test only branches without explicit labels (see http://hyphy.org/tutorials/phylotree/).
120 may contain polymorphism data) 226 Custom : Enter a branch label.
121 227
122 Leaves: test only terminal (leaf) branches 228 --pvalue The significance level used to determine significance (default: 0.1, range: 0 to 1).
123 229
124 Unlabeled: if the Newick string is labeled using the {} notation, 230 --srv Include site-to-site synonymous rate variation?
125 test only branches without explicit labels 231 Yes [default] : Allow synonymous rates to vary from site to site.
126 (see http://hyphy.org/tutorials/phylotree/) 232 No : Do not allow synonymous rates to vary.
127 233
128 --pvalue The significance level used to determine significance 234 --ci Compute profile likelihood confidence intervals for each variable site (default: No).
129 235
130 --srv Include site-to-site synonymous rate variation? 236 Advanced Attributes
131 Yes [default] or No 237 -------------------
132 238 ::
133 239
134 240 --resample Perform parametric bootstrap resampling to derive site-level null LRT distributions.
135 241 Warning: This will result in a significantly slower analysis. A value of 0 means no resampling is performed. This parameter specifies the maximum number of replicates per site (default: 0, range: 0 to 1000).
136 ]]> 242
137 243 --restrict-sites Restrict FEL analysis to a subset of sites. If Yes, allows specifying a subset of sites for analysis.
138 </help> 244 Yes : Restrict analysis to a subset of sites.
245 No [default] : Do not restrict analysis to a subset of sites.
246
247 --limit-to-sites Only analyze sites whose 1-based indices match the following list (null to skip). This option is available only if 'Restrict FEL analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices.
248 --save-lf-for-sites For sites whose 1-based indices match the following list, write out likelihood function snapshots (empty string to skip). This option is available only if 'Restrict FEL analysis to a subset of sites' is set to 'Yes'. Comma-separated list of site indices.
249
250 --precision Optimization precision settings for preliminary fits.
251 Standard [default]
252 Reduced for faster fitting
253
254 --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency.
255 Yes [default] : Automatically delete internal zero-length branches for computational efficiency (will not affect results otherwise).
256 Constrain : Keep zero-length branches, but constrain their values to 0.
257 No : Keep all branches.
258
259 --full-model Perform branch length re-optimization under the full codon model (default: Yes). If true, re-optimizes branch lengths under the full codon model.
260
261 ]]>;
262 </help>
139 263
140 <expand macro="citations"> 264 <expand macro="citations">
141 <citation type="doi">10.1093/molbev/msi105</citation> 265 <citation type="doi">10.1093/molbev/msi105</citation>
142 </expand> 266 </expand>
143 </tool> 267 </tool>