Mercurial > repos > iuc > hyphy_busted
diff hyphy_busted.xml @ 35:3169a46cd44c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit d97b1b98a3a621c93a7ed9e7db16bda47eefcb92
| author | iuc |
|---|---|
| date | Tue, 07 Oct 2025 20:39:51 +0000 |
| parents | 966f51476bd7 |
| children |
line wrap: on
line diff
--- a/hyphy_busted.xml Thu Mar 02 15:07:52 2023 +0000 +++ b/hyphy_busted.xml Tue Oct 07 20:39:51 2025 +0000 @@ -7,56 +7,208 @@ <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ @SYMLINK_FILES@ - hyphy busted + ENV="TOLERATE_NUMERICAL_ERRORS=1;" hyphy busted --alignment ./$input_file @INPUT_TREE@ --code $gencodeid @branch_options@ --output '$busted_output' - @advanced_options@ + --syn-rates $advanced_options.syn_rates + --rates $advanced_options.rates + --grid-size $advanced_options.grid_size + --starting-points $advanced_options.starting_points + #if $advanced_options.multiple_hits != 'None': + --multiple-hits $advanced_options.multiple_hits + #end if + #if $advanced_options.error_sink: + --error-sink Yes + #end if + #if $advanced_options.save_alternative_model: + --save-fit alternative_model.nhx + #end if + #if $advanced_options.mss.enabled == "true": + --mss Yes + --mss-type $advanced_options.mss.mss_type_cond.mss_type + #if $advanced_options.mss.mss_type_cond.mss_type == "Random": + --mss-classes $advanced_options.mss.mss_type_cond.mss_classes + #elif $advanced_options.mss.mss_type_cond.mss_type == "Empirical": + --mss-file $advanced_options.mss.mss_type_cond.mss_file + #elif $advanced_options.mss.mss_type_cond.mss_type == "File": + --mss-file $advanced_options.mss.mss_type_cond.mss_file + --mss-neutral $advanced_options.mss.mss_type_cond.mss_neutral + #elif $advanced_options.mss.mss_type_cond.mss_type == "Codon-file": + --mss-file $advanced_options.mss.mss_type_cond.mss_file + --mss-neutral $advanced_options.mss.mss_type_cond.mss_neutral + #end if + #end if + --kill-zero-lengths $advanced_options.kill_zero_lengths + > busted_stdout.md @ERRORS@ ]]></command> <inputs> <expand macro="inputs"/> <expand macro="gencode"/> <expand macro="branches"/> - <expand macro="srv"/> + <section name="advanced_options" title="Advanced Options" expanded="false"> + <param argument="--syn-rates" type="integer" min="1" max="10" value="3" label="Synonymous rate classes" help="The number of synonymous rate classes to include in the model [1-10, default 3]"/> + <param argument="--rates" type="integer" min="2" max="10" value="3" label="Non-synonymous rate classes" help="The number of non-synonymous rate classes to include in the model [1-10, default 3]"/> + <param argument="--grid-size" type="integer" value="250" max="5000" label="Points in the initial distributional guess for likelihood fitting" help="The number of points in the initial distributional guess for likelihood fitting"/> + <param argument="--starting-points" type="integer" value="1" label="Initial random guesses to seed rate values optimization" help="The number of initial random guesses to seed rate values optimization" max="1000"/> + <param argument="--multiple-hits" type="select" label="Multiple hits correction" help="Include support for multiple nucleotide substitutions"> + <option value="None">None</option> + <option value="Double">Double</option> + <option value="Double+Triple">Double and Triple</option> + </param> + <param argument="--error-sink" type="boolean" truevalue="Yes" falsevalue="No" label="Include a rate class for misalignment artifacts (experimental)" help="Include a rate class to capture misalignment artifacts" checked="true"/> + <param name="save_alternative_model" type="boolean" label="Save alternative model fit" help="Save BUSTED model fit to this file (default is not to save)"/> + <conditional name="mss"> + <param name="enabled" type="select" label="Include support for multiple synonymous rate class substitutions"> + <option value="false" selected="true">No</option> + <option value="true">Yes</option> + </param> + <when value="false"/> + <when value="true"> + <conditional name="mss_type_cond"> + <param name="mss_type" type="select" label="How to partition synonymous codons into classes"> + <option value="Full">Each set of codons mapping to the same amino-acid class have a separate substitution rate (Valine == neutral)</option> + <option value="SynREV">Each set of codons mapping to the same amino-acid class have a separate substitution rate (mean = 1)</option> + <option value="SynREV2">Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (no rate scaling))</option> + <option value="SynREV2g">Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (Valine == neutral). All between-class synonymous substitutions share a rate.</option> + <option value="SynREVCodon">Each codon pair that is exchangeable gets its own substitution rate (fully estimated, mean = 1)</option> + <option value="Random">Random partition (specify how many classes; largest class = neutral)</option> + <option value="Empirical">Load a TSV file with an empirical rate estimate for each codon pair</option> + <option value="File">Load a TSV partition from file (prompted for neutral class)</option> + <option value="Codon-file">Load a TSV partition for pairs of codons from a file (prompted for neutral class)</option> + </param> + <when value="Full"/> + <when value="SynREV"/> + <when value="SynREV2"/> + <when value="SynREV2g"/> + <when value="SynREVCodon"/> + <when value="Random"> + <param name="mss_classes" type="integer" value="2" label="How many codon rate classes"/> + </when> + <when value="Empirical"> + <param name="mss_file" type="data" format="tabular" label="File defining empirical rates for each pair of codons"/> + </when> + <when value="File"> + <param name="mss_file" type="data" format="tabular" label="File defining the model partition"/> + <param name="mss_neutral" type="text" value="neutral" label="Designation for the neutral substitution rate"/> + </when> + <when value="Codon-file"> + <param name="mss_file" type="data" format="tabular" label="File defining the model partition for pairs of codons"/> + <param name="mss_neutral" type="text" value="neutral" label="Designation for the neutral substitution rate"/> + </when> + </conditional> + </when> + </conditional> + <expand macro="kill_zero_lengths_param"/> + </section> </inputs> <outputs> <data name="busted_output" format="hyphy_results.json" /> - <expand macro="alternative_model_output" /> + <data name="busted_md_report" format="markdown" from_work_dir="busted_stdout.md" label="BUSTED Report (Markdown) for ${tool.name} on ${on_string}" /> + <data name="alternative_model" format="nex" from_work_dir="alternative_model.nhx" label="${tool.name} on ${on_string}: Alternative model"> + <filter>advanced_options["save_alternative_model"]</filter> + </data> + </outputs> <tests> - <test> + <test expect_num_outputs="2"> <param name="input_file" ftype="fasta" value="absrel-in1.fa"/> <param name="input_nhx" ftype="nhx" value="absrel-in1.nhx"/> <output name="busted_output"> <assert_contents> - <has_size value="113683" delta="2000"/> <has_text text="substitutions"/> <has_text text="test results"/> </assert_contents> </output> + <output name="busted_md_report"> + <assert_contents> + <has_text text="For *test* branches, the following rate distribution for branch-site combinations was inferred"/> + </assert_contents> + </output> </test> - <test> + <test expect_num_outputs="3"> <param name="input_file" ftype="nex" value="busted-in2.nex"/> <conditional name="branch_cond"> - <param name="branch_sel" value="specify"/> - <param name="branch_label" value="B_US_90_WEAU160_BRANCH" /> + <param name="branch_sel" value="Internal"/> </conditional> - <param name="srv_options" value="specify" /> - <param name="save_alternative_model" value="true" /> + <section name="advanced_options"> + <param name="syn_rates" value="2"/> + <param name="rates" value="2"/> + <param name="save_alternative_model" value="true"/> + </section> <output name="busted_output"> <assert_contents> - <has_size value="112119" delta="2000"/> <has_text text="substitutions"/> <has_text text="test results"/> </assert_contents> </output> <output name="alternative_model"> <assert_contents> - <has_size value="333928" delta="2000"/> - <has_text text="END;"/> + <has_text text="BEGIN HYPHY;"/> + </assert_contents> + </output> + <output name="busted_md_report"> + <assert_contents> + <has_text text="For *test* branches, the following rate distribution for branch-site combinations was inferred"/> + <has_text text="Selected 5 branches to test in the BUSTED analysis"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="3"> + <param name="input_file" ftype="nex" value="busted-in2.nex"/> + <conditional name="branch_cond"> + <param name="branch_sel" value="Internal"/> + </conditional> + <section name="advanced_options"> + <param name="starting_points" value="5"/> + <param name="multiple_hits" value="Double"/> + <param name="error_sink" value="true"/> + <param name="save_alternative_model" value="true"/> + </section> + <output name="busted_output"> + <assert_contents> + <has_text text="test results"/> + </assert_contents> + </output> + <output name="alternative_model"> + <assert_contents> + <has_text text="BEGIN HYPHY;"/> + </assert_contents> + </output> + <output name="busted_md_report"> + <assert_contents> + <has_text text="rate at which 2 nucleotides are changed instantly within a single codon"/> + <has_text text="Error absorption"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="2"> + <param name="input_file" ftype="nex" value="busted-in2.nex"/> + <conditional name="branch_cond"> + <param name="branch_sel" value="Internal"/> + </conditional> + <section name="advanced_options"> + <conditional name="mss"> + <param name="enabled" value="true"/> + <conditional name="mss_type_cond"> + <param name="mss_type" value="Codon-file"/> + <param name="mss_file" value="mss.tsv"/> + <param name="mss_neutral" value="NEUTRAL"/> + </conditional> + </conditional> + </section> + <output name="busted_output"> + <assert_contents> + <has_text text="test results"/> + </assert_contents> + </output> + <output name="busted_md_report"> + <assert_contents> + <has_text text=">mss-neutral => NEUTRAL"/> + <has_text text="## Branch-site unrestricted statistical test of episodic diversification [BUSTED]"/> </assert_contents> </output> </test> @@ -84,14 +236,16 @@ Brief description ----------------- -BUSTED (branch-site unrestricted statistical test of episodic -diversification) uses a random effects branch-site model fitted jointly -to all or a subset of tree branches in order to test for alignment-wide -evidence of episodic diversifying selection. Assuming there is evidence -of positive selection (i.e. there is an omega > 1), BUSTED will also -perform a quick evidence-ratio style analysis to explore which -individual sites may have been subject to selection. +BUSTED (Branch-site Unrestricted Statistical Test for Episodic Diversification) is a powerful tool for detecting gene-wide evidence of episodic positive selection. It works by fitting a codon model to the data and comparing a null model, which does not allow for positive selection, to an alternative model that does. If the alternative model provides a statistically significant better fit to the data, then we can conclude that there is evidence for positive selection. + +The core of BUSTED is a random effects branch-site model. This model allows the selection pressure (represented by the omega ratio, dN/dS) to vary both among sites in the alignment and across branches in the phylogenetic tree. The model includes three rate classes for omega: one for negative/purifying selection (omega < 1), one for neutral evolution (omega = 1), and one for positive/diversifying selection (omega > 1). +BUSTED tests for positive selection by comparing a constrained model (where omega is not allowed to be greater than 1) to an unconstrained model (where omega can be greater than 1). A likelihood ratio test is used to determine if the unconstrained model is a significantly better fit to the data. If it is, then there is evidence for positive selection acting on the gene. + +MSS Methodology +--------------- + +BUSTED can also incorporate models of selection on synonymous substitutions (MSS models). This is a new comparative framework for estimating selection on synonymous substitutions. These models account for selection by partitioning synonymous substitutions into multiple classes and estimating relative substitution rates for each, while also considering confounders like mutation bias. This framework allows for the study of selection on synonymous substitutions in diverse taxa without prior assumptions about the driving forces. For more information, please see the source publication: http://pubmed.ncbi.nlm.nih.gov/40129111/ Input ----- @@ -125,6 +279,10 @@ --code Which genetic code to use + --alignment An in-frame codon alignment in one of the formats supported by HyPhy. + + --tree A phylogenetic tree (optionally annotated with {}). + --branches Which branches should be tested for selection? All [default] : test all branches @@ -137,6 +295,49 @@ Unlabeled: if the Newick string is labeled using the {} notation, test only branches without explicit labels (see http://hyphy.org/tutorials/phylotree/) + + --kill-zero-lengths Automatically delete internal zero-length branches for computational efficiency. + + Advanced parameters + ................... + + --srv Include synonymous rate variation in the model. + + --grid-size The number of points in the initial distributional guess for likelihood fitting. + + --starting-points The number of initial random guesses to seed rate values optimization. + + --syn-rates The number of synonymous rate classes to include in the model [1-10, default 3]. + + --rates The number of non-synonymous rate classes to include in the model [1-10, default 3]. + + --multiple-hits Include support for multiple nucleotide substitutions. + None: No correction. + Double: Allow double substitutions. + Double+Triple: Allow double and triple substitutions. + + --error-sink [Advanced experimental setting] Include a rate class to capture misalignment artifacts. + + --mss Include support for multiple synonymous rate class substitutions. + + --mss-type How to partition synonymous codons into classes. + Full: Each set of codons mapping to the same amino-acid class have a separate substitution rate (Valine == neutral) + SynREV: Each set of codons mapping to the same amino-acid class have a separate substitution rate (mean = 1) + SynREV2: Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (no rate scaling)) + SynREV2g: Each pair of synonymous codons mapping to the same amino-acid class and separated by a transition have a separate substitution rate (Valine == neutral). All between-class synonymous substitutions share a rate. + SynREVCodon: Each codon pair that is exchangeable gets its own substitution rate (fully estimated, mean = 1) + Random: Random partition (specify how many classes; largest class = neutral) + Empirical: Load a TSV file with an empirical rate estimate for each codon pair + File: Load a TSV partition from file (prompted for neutral class) + Codon-file: Load a TSV partition for pairs of codons from a file (prompted for neutral class) + + --mss-file File defining the model partition. + + --mss-reference-rate Normalize relative to these rates. + + --mss-classes How many codon rate classes. + + --mss-neutral Designation for the neutral substitution rate. ]]> </help> <expand macro="citations">
