Mercurial > repos > iuc > hyphy_busted
comparison hyphy_busted.xml @ 35:3169a46cd44c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
| author | iuc |
|---|---|
| date | Tue, 07 Oct 2025 20:39:51 +0000 |
| parents | 966f51476bd7 |
| children |
comparison
equal
deleted
inserted
replaced
| 34:966f51476bd7 | 35:3169a46cd44c |
|---|---|
| 5 </macros> | 5 </macros> |
| 6 <expand macro="bio_tools"/> | 6 <expand macro="bio_tools"/> |
| 7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
| 8 <command detect_errors="exit_code"><![CDATA[ | 8 <command detect_errors="exit_code"><![CDATA[ |
| 9 @SYMLINK_FILES@ | 9 @SYMLINK_FILES@ |
| 10 hyphy busted | 10 ENV="TOLERATE_NUMERICAL_ERRORS=1;" hyphy busted |
| 11 --alignment ./$input_file | 11 --alignment ./$input_file |
| 12 @INPUT_TREE@ | 12 @INPUT_TREE@ |
| 13 --code $gencodeid | 13 --code $gencodeid |
| 14 @branch_options@ | 14 @branch_options@ |
| 15 --output '$busted_output' | 15 --output '$busted_output' |
| 16 @advanced_options@ | 16 --syn-rates $advanced_options.syn_rates |
| 17 --rates $advanced_options.rates | |
| 18 --grid-size $advanced_options.grid_size | |
| 19 --starting-points $advanced_options.starting_points | |
| 20 #if $advanced_options.multiple_hits != 'None': | |
| 21 --multiple-hits $advanced_options.multiple_hits | |
| 22 #end if | |
| 23 #if $advanced_options.error_sink: | |
| 24 --error-sink Yes | |
| 25 #end if | |
| 26 #if $advanced_options.save_alternative_model: | |
| 27 --save-fit alternative_model.nhx | |
| 28 #end if | |
| 29 #if $advanced_options.mss.enabled == "true": | |
| 30 --mss Yes | |
| 31 --mss-type $advanced_options.mss.mss_type_cond.mss_type | |
| 32 #if $advanced_options.mss.mss_type_cond.mss_type == "Random": | |
| 33 --mss-classes $advanced_options.mss.mss_type_cond.mss_classes | |
| 34 #elif $advanced_options.mss.mss_type_cond.mss_type == "Empirical": | |
| 35 --mss-file $advanced_options.mss.mss_type_cond.mss_file | |
| 36 #elif $advanced_options.mss.mss_type_cond.mss_type == "File": | |
| 37 --mss-file $advanced_options.mss.mss_type_cond.mss_file | |
| 38 --mss-neutral $advanced_options.mss.mss_type_cond.mss_neutral | |
| 39 #elif $advanced_options.mss.mss_type_cond.mss_type == "Codon-file": | |
| 40 --mss-file $advanced_options.mss.mss_type_cond.mss_file | |
| 41 --mss-neutral $advanced_options.mss.mss_type_cond.mss_neutral | |
| 42 #end if | |
| 43 #end if | |
| 44 --kill-zero-lengths $advanced_options.kill_zero_lengths | |
| 45 > busted_stdout.md | |
| 17 @ERRORS@ | 46 @ERRORS@ |
| 18 ]]></command> | 47 ]]></command> |
| 19 <inputs> | 48 <inputs> |
| 20 <expand macro="inputs"/> | 49 <expand macro="inputs"/> |
| 21 <expand macro="gencode"/> | 50 <expand macro="gencode"/> |
| 22 <expand macro="branches"/> | 51 <expand macro="branches"/> |
| 23 <expand macro="srv"/> | 52 <section name="advanced_options" title="Advanced Options" expanded="false"> |
| 53 <param argument="--syn-rates" type="integer" min="1" max="10" value="3" label="Synonymous rate classes" help="The number of synonymous rate classes to include in the model [1-10, default 3]"/> | |
| 54 <param argument="--rates" type="integer" min="2" max="10" value="3" label="Non-synonymous rate classes" help="The number of non-synonymous rate classes to include in the model [1-10, default 3]"/> | |
| 55 <param argument="--grid-size" type="integer" value="250" max="5000" label="Points in the initial distributional guess for likelihood fitting" help="The number of points in the initial distributional guess for likelihood fitting"/> | |
| 56 <param argument="--starting-points" type="integer" value="1" label="Initial random guesses to seed rate values optimization" help="The number of initial random guesses to seed rate values optimization" max="1000"/> | |
| 57 <param argument="--multiple-hits" type="select" label="Multiple hits correction" help="Include support for multiple nucleotide substitutions"> | |
| 58 <option value="None">None</option> | |
| 59 <option value="Double">Double</option> | |
| 60 <option value="Double+Triple">Double and Triple</option> | |
| 61 </param> | |
| 62 <param argument="--error-sink" type="boolean" truevalue="Yes" falsevalue="No" label="Include a rate class for misalignment artifacts (experimental)" help="Include a rate class to capture misalignment artifacts" checked="true"/> | |
| 63 <param name="save_alternative_model" type="boolean" label="Save alternative model fit" help="Save BUSTED model fit to this file (default is not to save)"/> | |
| 64 <conditional name="mss"> | |
| 65 <param name="enabled" type="select" label="Include support for multiple synonymous rate class substitutions"> | |
| 66 <option value="false" selected="true">No</option> | |
| 67 <option value="true">Yes</option> | |
| 68 </param> | |
| 69 <when value="false"/> | |
| 70 <when value="true"> | |
| 71 <conditional name="mss_type_cond"> | |
| 72 <param name="mss_type" type="select" label="How to partition synonymous codons into classes"> | |
| 73 <option value="Full">Each set of codons mapping to the same amino-acid class have a separate substitution rate (Valine == neutral)</option> | |
| 74 <option value="SynREV">Each set of codons mapping to the same amino-acid class have a separate substitution rate (mean = 1)</option> | |
| 75 <option value="SynREV2">Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (no rate scaling))</option> | |
| 76 <option value="SynREV2g">Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (Valine == neutral). All between-class synonymous substitutions share a rate.</option> | |
| 77 <option value="SynREVCodon">Each codon pair that is exchangeable gets its own substitution rate (fully estimated, mean = 1)</option> | |
| 78 <option value="Random">Random partition (specify how many classes; largest class = neutral)</option> | |
| 79 <option value="Empirical">Load a TSV file with an empirical rate estimate for each codon pair</option> | |
| 80 <option value="File">Load a TSV partition from file (prompted for neutral class)</option> | |
| 81 <option value="Codon-file">Load a TSV partition for pairs of codons from a file (prompted for neutral class)</option> | |
| 82 </param> | |
| 83 <when value="Full"/> | |
| 84 <when value="SynREV"/> | |
| 85 <when value="SynREV2"/> | |
| 86 <when value="SynREV2g"/> | |
| 87 <when value="SynREVCodon"/> | |
| 88 <when value="Random"> | |
| 89 <param name="mss_classes" type="integer" value="2" label="How many codon rate classes"/> | |
| 90 </when> | |
| 91 <when value="Empirical"> | |
| 92 <param name="mss_file" type="data" format="tabular" label="File defining empirical rates for each pair of codons"/> | |
| 93 </when> | |
| 94 <when value="File"> | |
| 95 <param name="mss_file" type="data" format="tabular" label="File defining the model partition"/> | |
| 96 <param name="mss_neutral" type="text" value="neutral" label="Designation for the neutral substitution rate"/> | |
| 97 </when> | |
| 98 <when value="Codon-file"> | |
| 99 <param name="mss_file" type="data" format="tabular" label="File defining the model partition for pairs of codons"/> | |
| 100 <param name="mss_neutral" type="text" value="neutral" label="Designation for the neutral substitution rate"/> | |
| 101 </when> | |
| 102 </conditional> | |
| 103 </when> | |
| 104 </conditional> | |
| 105 <expand macro="kill_zero_lengths_param"/> | |
| 106 </section> | |
| 24 </inputs> | 107 </inputs> |
| 25 <outputs> | 108 <outputs> |
| 26 <data name="busted_output" format="hyphy_results.json" /> | 109 <data name="busted_output" format="hyphy_results.json" /> |
| 27 <expand macro="alternative_model_output" /> | 110 <data name="busted_md_report" format="markdown" from_work_dir="busted_stdout.md" label="BUSTED Report (Markdown) for ${tool.name} on ${on_string}" /> |
| 111 <data name="alternative_model" format="nex" from_work_dir="alternative_model.nhx" label="${tool.name} on ${on_string}: Alternative model"> | |
| 112 <filter>advanced_options["save_alternative_model"]</filter> | |
| 113 </data> | |
| 114 | |
| 28 </outputs> | 115 </outputs> |
| 29 <tests> | 116 <tests> |
| 30 <test> | 117 <test expect_num_outputs="2"> |
| 31 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/> | 118 <param name="input_file" ftype="fasta" value="absrel-in1.fa"/> |
| 32 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/> | 119 <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/> |
| 33 <output name="busted_output"> | 120 <output name="busted_output"> |
| 34 <assert_contents> | 121 <assert_contents> |
| 35 <has_size value="113683" delta="2000"/> | |
| 36 <has_text text="substitutions"/> | 122 <has_text text="substitutions"/> |
| 37 <has_text text="test results"/> | 123 <has_text text="test results"/> |
| 38 </assert_contents> | 124 </assert_contents> |
| 39 </output> | 125 </output> |
| 126 <output name="busted_md_report"> | |
| 127 <assert_contents> | |
| 128 <has_text text="For *test* branches, the following rate distribution for branch-site combinations was inferred"/> | |
| 129 </assert_contents> | |
| 130 </output> | |
| 40 </test> | 131 </test> |
| 41 <test> | 132 <test expect_num_outputs="3"> |
| 42 <param name="input_file" ftype="nex" value="busted-in2.nex"/> | 133 <param name="input_file" ftype="nex" value="busted-in2.nex"/> |
| 43 <conditional name="branch_cond"> | 134 <conditional name="branch_cond"> |
| 44 <param name="branch_sel" value="specify"/> | 135 <param name="branch_sel" value="Internal"/> |
| 45 <param name="branch_label" value="B_US_90_WEAU160_BRANCH" /> | |
| 46 </conditional> | 136 </conditional> |
| 47 <param name="srv_options" value="specify" /> | 137 <section name="advanced_options"> |
| 48 <param name="save_alternative_model" value="true" /> | 138 <param name="syn_rates" value="2"/> |
| 139 <param name="rates" value="2"/> | |
| 140 <param name="save_alternative_model" value="true"/> | |
| 141 </section> | |
| 49 <output name="busted_output"> | 142 <output name="busted_output"> |
| 50 <assert_contents> | 143 <assert_contents> |
| 51 <has_size value="112119" delta="2000"/> | |
| 52 <has_text text="substitutions"/> | 144 <has_text text="substitutions"/> |
| 53 <has_text text="test results"/> | 145 <has_text text="test results"/> |
| 54 </assert_contents> | 146 </assert_contents> |
| 55 </output> | 147 </output> |
| 56 <output name="alternative_model"> | 148 <output name="alternative_model"> |
| 57 <assert_contents> | 149 <assert_contents> |
| 58 <has_size value="333928" delta="2000"/> | 150 <has_text text="BEGIN HYPHY;"/> |
| 59 <has_text text="END;"/> | 151 </assert_contents> |
| 152 </output> | |
| 153 <output name="busted_md_report"> | |
| 154 <assert_contents> | |
| 155 <has_text text="For *test* branches, the following rate distribution for branch-site combinations was inferred"/> | |
| 156 <has_text text="Selected 5 branches to test in the BUSTED analysis"/> | |
| 157 </assert_contents> | |
| 158 </output> | |
| 159 </test> | |
| 160 <test expect_num_outputs="3"> | |
| 161 <param name="input_file" ftype="nex" value="busted-in2.nex"/> | |
| 162 <conditional name="branch_cond"> | |
| 163 <param name="branch_sel" value="Internal"/> | |
| 164 </conditional> | |
| 165 <section name="advanced_options"> | |
| 166 <param name="starting_points" value="5"/> | |
| 167 <param name="multiple_hits" value="Double"/> | |
| 168 <param name="error_sink" value="true"/> | |
| 169 <param name="save_alternative_model" value="true"/> | |
| 170 </section> | |
| 171 <output name="busted_output"> | |
| 172 <assert_contents> | |
| 173 <has_text text="test results"/> | |
| 174 </assert_contents> | |
| 175 </output> | |
| 176 <output name="alternative_model"> | |
| 177 <assert_contents> | |
| 178 <has_text text="BEGIN HYPHY;"/> | |
| 179 </assert_contents> | |
| 180 </output> | |
| 181 <output name="busted_md_report"> | |
| 182 <assert_contents> | |
| 183 <has_text text="rate at which 2 nucleotides are changed instantly within a single codon"/> | |
| 184 <has_text text="Error absorption"/> | |
| 185 </assert_contents> | |
| 186 </output> | |
| 187 </test> | |
| 188 <test expect_num_outputs="2"> | |
| 189 <param name="input_file" ftype="nex" value="busted-in2.nex"/> | |
| 190 <conditional name="branch_cond"> | |
| 191 <param name="branch_sel" value="Internal"/> | |
| 192 </conditional> | |
| 193 <section name="advanced_options"> | |
| 194 <conditional name="mss"> | |
| 195 <param name="enabled" value="true"/> | |
| 196 <conditional name="mss_type_cond"> | |
| 197 <param name="mss_type" value="Codon-file"/> | |
| 198 <param name="mss_file" value="mss.tsv"/> | |
| 199 <param name="mss_neutral" value="NEUTRAL"/> | |
| 200 </conditional> | |
| 201 </conditional> | |
| 202 </section> | |
| 203 <output name="busted_output"> | |
| 204 <assert_contents> | |
| 205 <has_text text="test results"/> | |
| 206 </assert_contents> | |
| 207 </output> | |
| 208 <output name="busted_md_report"> | |
| 209 <assert_contents> | |
| 210 <has_text text=">mss-neutral => NEUTRAL"/> | |
| 211 <has_text text="## Branch-site unrestricted statistical test of episodic diversification [BUSTED]"/> | |
| 60 </assert_contents> | 212 </assert_contents> |
| 61 </output> | 213 </output> |
| 62 </test> | 214 </test> |
| 63 </tests> | 215 </tests> |
| 64 <help><![CDATA[ | 216 <help><![CDATA[ |
| 82 2. Testing small or low-divergence alignments (i.e. ~30 sequences) for evidence of positive diversifying selection, where neither branch nor site level methods have sufficient power. | 234 2. Testing small or low-divergence alignments (i.e. ~30 sequences) for evidence of positive diversifying selection, where neither branch nor site level methods have sufficient power. |
| 83 | 235 |
| 84 Brief description | 236 Brief description |
| 85 ----------------- | 237 ----------------- |
| 86 | 238 |
| 87 BUSTED (branch-site unrestricted statistical test of episodic | 239 BUSTED (Branch-site Unrestricted Statistical Test for Episodic Diversification) is a powerful tool for detecting gene-wide evidence of episodic positive selection. It works by fitting a codon model to the data and comparing a null model, which does not allow for positive selection, to an alternative model that does. If the alternative model provides a statistically significant better fit to the data, then we can conclude that there is evidence for positive selection. |
| 88 diversification) uses a random effects branch-site model fitted jointly | 240 |
| 89 to all or a subset of tree branches in order to test for alignment-wide | 241 The core of BUSTED is a random effects branch-site model. This model allows the selection pressure (represented by the omega ratio, dN/dS) to vary both among sites in the alignment and across branches in the phylogenetic tree. The model includes three rate classes for omega: one for negative/purifying selection (omega < 1), one for neutral evolution (omega = 1), and one for positive/diversifying selection (omega > 1). |
| 90 evidence of episodic diversifying selection. Assuming there is evidence | 242 |
| 91 of positive selection (i.e. there is an omega > 1), BUSTED will also | 243 BUSTED tests for positive selection by comparing a constrained model (where omega is not allowed to be greater than 1) to an unconstrained model (where omega can be greater than 1). A likelihood ratio test is used to determine if the unconstrained model is a significantly better fit to the data. If it is, then there is evidence for positive selection acting on the gene. |
| 92 perform a quick evidence-ratio style analysis to explore which | 244 |
| 93 individual sites may have been subject to selection. | 245 MSS Methodology |
| 94 | 246 --------------- |
| 247 | |
| 248 BUSTED can also incorporate models of selection on synonymous substitutions (MSS models). This is a new comparative framework for estimating selection on synonymous substitutions. These models account for selection by partitioning synonymous substitutions into multiple classes and estimating relative substitution rates for each, while also considering confounders like mutation bias. This framework allows for the study of selection on synonymous substitutions in diverse taxa without prior assumptions about the driving forces. For more information, please see the source publication: http://pubmed.ncbi.nlm.nih.gov/40129111/ | |
| 95 | 249 |
| 96 Input | 250 Input |
| 97 ----- | 251 ----- |
| 98 | 252 |
| 99 1. A *FASTA* sequence alignment. | 253 1. A *FASTA* sequence alignment. |
| 123 :: | 277 :: |
| 124 | 278 |
| 125 | 279 |
| 126 --code Which genetic code to use | 280 --code Which genetic code to use |
| 127 | 281 |
| 282 --alignment An in-frame codon alignment in one of the formats supported by HyPhy. | |
| 283 | |
| 284 --tree A phylogenetic tree (optionally annotated with {}). | |
| 285 | |
| 128 --branches Which branches should be tested for selection? | 286 --branches Which branches should be tested for selection? |
| 129 All [default] : test all branches | 287 All [default] : test all branches |
| 130 | 288 |
| 131 Internal : test only internal branches (suitable for | 289 Internal : test only internal branches (suitable for |
| 132 intra-host pathogen evolution for example, where terminal branches | 290 intra-host pathogen evolution for example, where terminal branches |
| 135 Leaves: test only terminal (leaf) branches | 293 Leaves: test only terminal (leaf) branches |
| 136 | 294 |
| 137 Unlabeled: if the Newick string is labeled using the {} notation, | 295 Unlabeled: if the Newick string is labeled using the {} notation, |
| 138 test only branches without explicit labels | 296 test only branches without explicit labels |
| 139 (see http://hyphy.org/tutorials/phylotree/) | 297 (see http://hyphy.org/tutorials/phylotree/) |
| 298 | |
| 299 --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency. | |
| 300 | |
| 301 Advanced parameters | |
| 302 ................... | |
| 303 | |
| 304 --srv Include synonymous rate variation in the model. | |
| 305 | |
| 306 --grid-size The number of points in the initial distributional guess for likelihood fitting. | |
| 307 | |
| 308 --starting-points The number of initial random guesses to seed rate values optimization. | |
| 309 | |
| 310 --syn-rates The number of synonymous rate classes to include in the model [1-10, default 3]. | |
| 311 | |
| 312 --rates The number of non-synonymous rate classes to include in the model [1-10, default 3]. | |
| 313 | |
| 314 --multiple-hits Include support for multiple nucleotide substitutions. | |
| 315 None: No correction. | |
| 316 Double: Allow double substitutions. | |
| 317 Double+Triple: Allow double and triple substitutions. | |
| 318 | |
| 319 --error-sink [Advanced experimental setting] Include a rate class to capture misalignment artifacts. | |
| 320 | |
| 321 --mss Include support for multiple synonymous rate class substitutions. | |
| 322 | |
| 323 --mss-type How to partition synonymous codons into classes. | |
| 324 Full: Each set of codons mapping to the same amino-acid class have a separate substitution rate (Valine == neutral) | |
| 325 SynREV: Each set of codons mapping to the same amino-acid class have a separate substitution rate (mean = 1) | |
| 326 SynREV2: Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (no rate scaling)) | |
| 327 SynREV2g: Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (Valine == neutral). All between-class synonymous substitutions share a rate. | |
| 328 SynREVCodon: Each codon pair that is exchangeable gets its own substitution rate (fully estimated, mean = 1) | |
| 329 Random: Random partition (specify how many classes; largest class = neutral) | |
| 330 Empirical: Load a TSV file with an empirical rate estimate for each codon pair | |
| 331 File: Load a TSV partition from file (prompted for neutral class) | |
| 332 Codon-file: Load a TSV partition for pairs of codons from a file (prompted for neutral class) | |
| 333 | |
| 334 --mss-file File defining the model partition. | |
| 335 | |
| 336 --mss-reference-rate Normalize relative to these rates. | |
| 337 | |
| 338 --mss-classes How many codon rate classes. | |
| 339 | |
| 340 --mss-neutral Designation for the neutral substitution rate. | |
| 140 ]]> | 341 ]]> |
| 141 </help> | 342 </help> |
| 142 <expand macro="citations"> | 343 <expand macro="citations"> |
| 143 <citation type="doi">10.1093/molbev/msv035</citation> | 344 <citation type="doi">10.1093/molbev/msv035</citation> |
| 144 </expand> | 345 </expand> |
