comparison hyphy_fubar.xml @ 36:da919379e8e4 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
author iuc
date Tue, 07 Oct 2025 20:40:57 +0000
parents d44c0b7a6cb8
children
comparison
equal deleted inserted replaced
35:d44c0b7a6cb8 36:da919379e8e4
11 hyphy fubar 11 hyphy fubar
12 --alignment ./$input_file 12 --alignment ./$input_file
13 @INPUT_TREE@ 13 @INPUT_TREE@
14 --code '$gencodeid' 14 --code '$gencodeid'
15 --method '$posteriorEstimationMethod.method' 15 --method '$posteriorEstimationMethod.method'
16 --grid '$grid_points'
17 @posteriorEstimationMethod_cmd@ 16 @posteriorEstimationMethod_cmd@
18 --concentration_parameter '$concentration' 17 --grid '$advanced_options.grid_points'
18 --concentration_parameter '$advanced_options.concentration'
19 --non-zero $advanced_options.non_zero
20 --kill-zero-lengths $advanced_options.kill_zero_lengths
21 > fubar_stdout.md
19 @ERRORS@ 22 @ERRORS@
20 ]]></command> 23 ]]></command>
21 <inputs> 24 <inputs>
22 <expand macro="inputs"/> 25 <expand macro="inputs"/>
23 <expand macro="gencode"/> 26 <expand macro="gencode"/>
24 <param argument="--grid" name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" />
25 <expand macro="conditional_posteriorEstimationMethod" /> 27 <expand macro="conditional_posteriorEstimationMethod" />
26 <param argument="--concentration_parameter" name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" /> 28 <section name="advanced_options" title="Advanced Options" expanded="false">
29 <param argument="--grid" name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" help="The number of grid points used to approximate the posterior distribution of dN and dS." />
30 <param argument="--concentration_parameter" name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" help="The concentration parameter of the Dirichlet prior on the grid weights." />
31 <param argument="--non-zero" type="boolean" truevalue="Yes" falsevalue="No" label="Enforce non-zero synonymous rates" help="Enforce non-zero synonymous rates on the grid. This is useful for calculating dN/dS ratios, as it prevents division by zero."/>
32 <expand macro="kill_zero_lengths_param"/>
33 </section>
27 34
28 </inputs> 35 </inputs>
29 <outputs> 36 <outputs>
30 <data name="fubar_output" format="hyphy_results.json" /> 37 <data name="fubar_output" format="hyphy_results.json" />
38 <data name="fubar_md_report" format="markdown" from_work_dir="fubar_stdout.md" label="FUBAR Report (Markdown) for ${tool.name} on ${on_string}" />
31 </outputs> 39 </outputs>
32 <tests> 40 <tests>
33 <test> 41 <test expect_num_outputs="2">
34 <param name="input_file" ftype="fasta.gz" value="fubar-in1.fa.gz"/> 42 <param name="input_file" ftype="fasta.gz" value="fubar-in1.fa.gz"/>
35 <param name="input_nhx" ftype="nhx" value="fubar-in1.nhx"/> 43 <param name="input_nhx" ftype="nhx" value="fubar-in1.nhx"/>
36 <conditional name="posteriorEstimationMethod"> 44 <conditional name="posteriorEstimationMethod">
37 <param name="method" value="Variational-Bayes"/> 45 <param name="method" value="Variational-Bayes"/>
38 </conditional> 46 </conditional>
39 <output name="fubar_output" file="fubar-out1.json" compare="sim_size"/> 47 <output name="fubar_output">
48 <assert_contents>
49 <has_text text="Empiricial Bayes Factor for positive selection at a site"/>
50 </assert_contents>
51 </output>
52 <output name="fubar_md_report">
53 <assert_contents>
54 <has_text text="Running an iterative zeroth order variational Bayes procedure to estimate the posterior mean of rate weights"/>
55 <has_text text="### Tabulating site-level results"/>
56 </assert_contents>
57 </output>
40 </test> 58 </test>
41 </tests> 59 </tests>
42 <help><![CDATA[ 60 <help><![CDATA[
43 61
44 FUBAR : Faste Unbiased Bayesian AppRoximation 62 FUBAR : Faste Unbiased Bayesian AppRoximation
59 FUBAR is our recommended method for detecting pervasive selection at individual sites on large (> 500 sequences) datasets for which other methods have prohibitive runtimes, unless you have access to a computer cluster. 77 FUBAR is our recommended method for detecting pervasive selection at individual sites on large (> 500 sequences) datasets for which other methods have prohibitive runtimes, unless you have access to a computer cluster.
60 78
61 Brief description 79 Brief description
62 ----------------- 80 -----------------
63 81
64 Perform a Fast Unbiased AppRoximate Bayesian (FUBAR) analysis of a 82 FUBAR (Fast, Unconstrained Bayesian AppRoximation) is a Bayesian method for detecting site-specific positive and negative selection. It is designed to be fast and efficient, making it suitable for large datasets.
65 coding sequence alignment to determine whether some sites have been 83
66 subject to pervasive purifying or diversifying selection. There are three methods 84 The core idea behind FUBAR is to model the non-synonymous (dN) and synonymous (dS) substitution rates at each site in a codon alignment. The ratio of these rates (dN/dS, or omega) is a measure of the selective pressure acting on a site. An omega value greater than 1 indicates positive (diversifying) selection, a value less than 1 indicates negative (purifying) selection, and a value of 1 indicates neutral evolution.
67 for estimating the posterior distribution of 85
68 grid weights: collapsed Gibbs MCMC (faster), 0-th order Variation 86 FUBAR uses a Bayesian approach to infer the posterior distribution of dN and dS at each site. It does this by discretizing the dN and dS rates into a grid of points and then using a Bayesian graphical model to infer the posterior probability of each grid point for each site. This approach is much faster than traditional MCMC-based methods, which require long run times to converge.
69 Bayes approximation (fastest), full Metropolis-Hastings (slowest). 87
88 FUBAR offers three different methods for estimating the posterior distribution:
89
90 * **Variational-Bayes:** A fast approximation method that is the recommended default.
91 * **Collapsed-Gibbs:** A faster MCMC method.
92 * **Metropolis-Hastings:** The original, slowest MCMC method.
70 93
71 Input 94 Input
72 ----- 95 -----
73 96
74 1. A *FASTA* sequence alignment. 97 1. A *FASTA* sequence alignment.
95 :: 118 ::
96 119
97 120
98 --code Which genetic code to use 121 --code Which genetic code to use
99 122
100 --grid The number of grid points 123 --grid The number of grid points used to approximate the posterior distribution of dN and dS. A larger grid will provide a more accurate approximation but will also be slower. The default value of 20 is a good compromise between speed and accuracy.
101 Smaller : faster
102 Larger : more precise posterior estimation but slower
103 default value: 20
104 124
105 --method Inference method to use 125 --method The inference method to use for estimating the posterior distribution.
106 Variational-Bayes : 0-th order Variational Bayes approximation; fastest [default] 126 Variational-Bayes : 0-th order Variational Bayes approximation; fastest [default]
107 Metropolis-Hastings : Full Metropolis-Hastings MCMC algorithm; orignal method [slowest] 127 Metropolis-Hastings : Full Metropolis-Hastings MCMC algorithm; orignal method [slowest]
108 Collapsed-Gibbs : Collapsed Gibbs sampler [intermediate speed] 128 Collapsed-Gibbs : Collapsed Gibbs sampler [intermediate speed]
109 129
110 130
111 --chains How many MCMC chains to run (does not apply to Variational-Bayes) 131 --chains The number of MCMC chains to run. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods. A larger number of chains will provide a better exploration of the posterior distribution but will also be slower.
112 default value: 5 132 default value: 5
113 133
114 --chain-length MCMC chain length (does not apply to Variational-Bayes) 134 --chain-length The length of each MCMC chain. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods. A longer chain will provide a better exploration of the posterior distribution but will also be slower.
115 default value: 2,000,000 135 default value: 2,000,000
116 136
117 --burn-in MCMC chain burn in (does not apply to Variational-Bayes) 137 --burn-in The number of samples to discard from the beginning of each MCMC chain. This is done to ensure that the chain has converged to the posterior distribution. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods.
118 default value: 1,000,000 138 default value: 1,000,000
119 139
120 --samples MCMC samples to draw (does not apply to Variational-Bayes) 140 --samples The number of samples to draw from each MCMC chain after the burn-in period. These samples are used to estimate the posterior distribution. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods.
121 default value: 1,000 141 default value: 1,000
122 142
123 --concentration_parameter 143 --concentration_parameter
124 The concentration parameter of the Dirichlet prior 144 The concentration parameter of the Dirichlet prior on the grid weights.
125 default value: 0.5 145 default value: 0.5
126 146
147 --non-zero Enforce non-zero synonymous rates on the grid. This is useful for calculating dN/dS ratios, as it prevents division by zero.
148
149 --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency. This will not affect the results.
127 150
128 ]]></help> 151 ]]></help>
129 <expand macro="citations"> 152 <expand macro="citations">
130 <citation type="doi">10.1093/molbev/mst030</citation> 153 <citation type="doi">10.1093/molbev/mst030</citation>
131 </expand> 154 </expand>