Mercurial > repos > iuc > hyphy_fubar
comparison hyphy_fubar.xml @ 36:da919379e8e4 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
| author | iuc |
|---|---|
| date | Tue, 07 Oct 2025 20:40:57 +0000 |
| parents | d44c0b7a6cb8 |
| children |
comparison
equal
deleted
inserted
replaced
| 35:d44c0b7a6cb8 | 36:da919379e8e4 |
|---|---|
| 11 hyphy fubar | 11 hyphy fubar |
| 12 --alignment ./$input_file | 12 --alignment ./$input_file |
| 13 @INPUT_TREE@ | 13 @INPUT_TREE@ |
| 14 --code '$gencodeid' | 14 --code '$gencodeid' |
| 15 --method '$posteriorEstimationMethod.method' | 15 --method '$posteriorEstimationMethod.method' |
| 16 --grid '$grid_points' | |
| 17 @posteriorEstimationMethod_cmd@ | 16 @posteriorEstimationMethod_cmd@ |
| 18 --concentration_parameter '$concentration' | 17 --grid '$advanced_options.grid_points' |
| 18 --concentration_parameter '$advanced_options.concentration' | |
| 19 --non-zero $advanced_options.non_zero | |
| 20 --kill-zero-lengths $advanced_options.kill_zero_lengths | |
| 21 > fubar_stdout.md | |
| 19 @ERRORS@ | 22 @ERRORS@ |
| 20 ]]></command> | 23 ]]></command> |
| 21 <inputs> | 24 <inputs> |
| 22 <expand macro="inputs"/> | 25 <expand macro="inputs"/> |
| 23 <expand macro="gencode"/> | 26 <expand macro="gencode"/> |
| 24 <param argument="--grid" name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" /> | |
| 25 <expand macro="conditional_posteriorEstimationMethod" /> | 27 <expand macro="conditional_posteriorEstimationMethod" /> |
| 26 <param argument="--concentration_parameter" name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" /> | 28 <section name="advanced_options" title="Advanced Options" expanded="false"> |
| 29 <param argument="--grid" name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" help="The number of grid points used to approximate the posterior distribution of dN and dS." /> | |
| 30 <param argument="--concentration_parameter" name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" help="The concentration parameter of the Dirichlet prior on the grid weights." /> | |
| 31 <param argument="--non-zero" type="boolean" truevalue="Yes" falsevalue="No" label="Enforce non-zero synonymous rates" help="Enforce non-zero synonymous rates on the grid. This is useful for calculating dN/dS ratios, as it prevents division by zero."/> | |
| 32 <expand macro="kill_zero_lengths_param"/> | |
| 33 </section> | |
| 27 | 34 |
| 28 </inputs> | 35 </inputs> |
| 29 <outputs> | 36 <outputs> |
| 30 <data name="fubar_output" format="hyphy_results.json" /> | 37 <data name="fubar_output" format="hyphy_results.json" /> |
| 38 <data name="fubar_md_report" format="markdown" from_work_dir="fubar_stdout.md" label="FUBAR Report (Markdown) for ${tool.name} on ${on_string}" /> | |
| 31 </outputs> | 39 </outputs> |
| 32 <tests> | 40 <tests> |
| 33 <test> | 41 <test expect_num_outputs="2"> |
| 34 <param name="input_file" ftype="fasta.gz" value="fubar-in1.fa.gz"/> | 42 <param name="input_file" ftype="fasta.gz" value="fubar-in1.fa.gz"/> |
| 35 <param name="input_nhx" ftype="nhx" value="fubar-in1.nhx"/> | 43 <param name="input_nhx" ftype="nhx" value="fubar-in1.nhx"/> |
| 36 <conditional name="posteriorEstimationMethod"> | 44 <conditional name="posteriorEstimationMethod"> |
| 37 <param name="method" value="Variational-Bayes"/> | 45 <param name="method" value="Variational-Bayes"/> |
| 38 </conditional> | 46 </conditional> |
| 39 <output name="fubar_output" file="fubar-out1.json" compare="sim_size"/> | 47 <output name="fubar_output"> |
| 48 <assert_contents> | |
| 49 <has_text text="Empiricial Bayes Factor for positive selection at a site"/> | |
| 50 </assert_contents> | |
| 51 </output> | |
| 52 <output name="fubar_md_report"> | |
| 53 <assert_contents> | |
| 54 <has_text text="Running an iterative zeroth order variational Bayes procedure to estimate the posterior mean of rate weights"/> | |
| 55 <has_text text="### Tabulating site-level results"/> | |
| 56 </assert_contents> | |
| 57 </output> | |
| 40 </test> | 58 </test> |
| 41 </tests> | 59 </tests> |
| 42 <help><![CDATA[ | 60 <help><![CDATA[ |
| 43 | 61 |
| 44 FUBAR : Faste Unbiased Bayesian AppRoximation | 62 FUBAR : Faste Unbiased Bayesian AppRoximation |
| 59 FUBAR is our recommended method for detecting pervasive selection at individual sites on large (> 500 sequences) datasets for which other methods have prohibitive runtimes, unless you have access to a computer cluster. | 77 FUBAR is our recommended method for detecting pervasive selection at individual sites on large (> 500 sequences) datasets for which other methods have prohibitive runtimes, unless you have access to a computer cluster. |
| 60 | 78 |
| 61 Brief description | 79 Brief description |
| 62 ----------------- | 80 ----------------- |
| 63 | 81 |
| 64 Perform a Fast Unbiased AppRoximate Bayesian (FUBAR) analysis of a | 82 FUBAR (Fast, Unconstrained Bayesian AppRoximation) is a Bayesian method for detecting site-specific positive and negative selection. It is designed to be fast and efficient, making it suitable for large datasets. |
| 65 coding sequence alignment to determine whether some sites have been | 83 |
| 66 subject to pervasive purifying or diversifying selection. There are three methods | 84 The core idea behind FUBAR is to model the non-synonymous (dN) and synonymous (dS) substitution rates at each site in a codon alignment. The ratio of these rates (dN/dS, or omega) is a measure of the selective pressure acting on a site. An omega value greater than 1 indicates positive (diversifying) selection, a value less than 1 indicates negative (purifying) selection, and a value of 1 indicates neutral evolution. |
| 67 for estimating the posterior distribution of | 85 |
| 68 grid weights: collapsed Gibbs MCMC (faster), 0-th order Variation | 86 FUBAR uses a Bayesian approach to infer the posterior distribution of dN and dS at each site. It does this by discretizing the dN and dS rates into a grid of points and then using a Bayesian graphical model to infer the posterior probability of each grid point for each site. This approach is much faster than traditional MCMC-based methods, which require long run times to converge. |
| 69 Bayes approximation (fastest), full Metropolis-Hastings (slowest). | 87 |
| 88 FUBAR offers three different methods for estimating the posterior distribution: | |
| 89 | |
| 90 * **Variational-Bayes:** A fast approximation method that is the recommended default. | |
| 91 * **Collapsed-Gibbs:** A faster MCMC method. | |
| 92 * **Metropolis-Hastings:** The original, slowest MCMC method. | |
| 70 | 93 |
| 71 Input | 94 Input |
| 72 ----- | 95 ----- |
| 73 | 96 |
| 74 1. A *FASTA* sequence alignment. | 97 1. A *FASTA* sequence alignment. |
| 95 :: | 118 :: |
| 96 | 119 |
| 97 | 120 |
| 98 --code Which genetic code to use | 121 --code Which genetic code to use |
| 99 | 122 |
| 100 --grid The number of grid points | 123 --grid The number of grid points used to approximate the posterior distribution of dN and dS. A larger grid will provide a more accurate approximation but will also be slower. The default value of 20 is a good compromise between speed and accuracy. |
| 101 Smaller : faster | |
| 102 Larger : more precise posterior estimation but slower | |
| 103 default value: 20 | |
| 104 | 124 |
| 105 --method Inference method to use | 125 --method The inference method to use for estimating the posterior distribution. |
| 106 Variational-Bayes : 0-th order Variational Bayes approximation; fastest [default] | 126 Variational-Bayes : 0-th order Variational Bayes approximation; fastest [default] |
| 107 Metropolis-Hastings : Full Metropolis-Hastings MCMC algorithm; orignal method [slowest] | 127 Metropolis-Hastings : Full Metropolis-Hastings MCMC algorithm; orignal method [slowest] |
| 108 Collapsed-Gibbs : Collapsed Gibbs sampler [intermediate speed] | 128 Collapsed-Gibbs : Collapsed Gibbs sampler [intermediate speed] |
| 109 | 129 |
| 110 | 130 |
| 111 --chains How many MCMC chains to run (does not apply to Variational-Bayes) | 131 --chains The number of MCMC chains to run. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods. A larger number of chains will provide a better exploration of the posterior distribution but will also be slower. |
| 112 default value: 5 | 132 default value: 5 |
| 113 | 133 |
| 114 --chain-length MCMC chain length (does not apply to Variational-Bayes) | 134 --chain-length The length of each MCMC chain. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods. A longer chain will provide a better exploration of the posterior distribution but will also be slower. |
| 115 default value: 2,000,000 | 135 default value: 2,000,000 |
| 116 | 136 |
| 117 --burn-in MCMC chain burn in (does not apply to Variational-Bayes) | 137 --burn-in The number of samples to discard from the beginning of each MCMC chain. This is done to ensure that the chain has converged to the posterior distribution. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods. |
| 118 default value: 1,000,000 | 138 default value: 1,000,000 |
| 119 | 139 |
| 120 --samples MCMC samples to draw (does not apply to Variational-Bayes) | 140 --samples The number of samples to draw from each MCMC chain after the burn-in period. These samples are used to estimate the posterior distribution. This is only applicable to the Metropolis-Hastings and Collapsed-Gibbs methods. |
| 121 default value: 1,000 | 141 default value: 1,000 |
| 122 | 142 |
| 123 --concentration_parameter | 143 --concentration_parameter |
| 124 The concentration parameter of the Dirichlet prior | 144 The concentration parameter of the Dirichlet prior on the grid weights. |
| 125 default value: 0.5 | 145 default value: 0.5 |
| 126 | 146 |
| 147 --non-zero Enforce non-zero synonymous rates on the grid. This is useful for calculating dN/dS ratios, as it prevents division by zero. | |
| 148 | |
| 149 --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency. This will not affect the results. | |
| 127 | 150 |
| 128 ]]></help> | 151 ]]></help> |
| 129 <expand macro="citations"> | 152 <expand macro="citations"> |
| 130 <citation type="doi">10.1093/molbev/mst030</citation> | 153 <citation type="doi">10.1093/molbev/mst030</citation> |
| 131 </expand> | 154 </expand> |
