Mercurial > repos > rdvelazquez > hyphy_fubar

<?xml version="1.0"?>
<tool id="hyphy_fubar" name="HyPhy-FUBAR" version="@VERSION@+galaxy0">
  <description>: Fast Unconstrained Bayesian
  AppRoximation</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <expand macro="requirements"/>
  <version_command>HYPHYMP --version | tail -n 1 | awk '{print
  $1}'</version_command>
  <command detect_errors="exit_code"><![CDATA[
    #if $posteriorEstimationMethod == 3:
      ln -s '$input_file' fubar_input.fa &&
      ln -s '$input_nhx' fubar_input.nhx &&
      echo $gencodeid > tool_params &&
      echo `pwd`/fubar_input.fa >> tool_params &&
      echo `pwd`/fubar_input.nhx >> tool_params &&
      echo '$grid_points' >> tool_params &&
      echo '$posteriorEstimationMethod' >> tool_params &&
      echo '$concentration' >> tool_params &&
      @HYPHY_INVOCATION@ \$HYPHY_LIB/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf > '$fubar_log'
    #else:
      ln -s '$input_file' fubar_input.fa &&
      ln -s '$input_nhx' fubar_input.nhx &&
      echo $gencodeid > tool_params &&
      echo `pwd`/fubar_input.fa >> tool_params &&
      echo `pwd`/fubar_input.nhx >> tool_params &&
      echo '$grid_points' >> tool_params &&
      echo '$posteriorEstimationMethod' >> tool_params &&
      echo '$mcmc' >> tool_params &&
      echo '$chain_length' >> tool_params &&
      echo '$samples' >> tool_params &&
      echo '$samples_per_chain' >> tool_params &&
      echo '$concentration' >> tool_params &&
      @HYPHY_INVOCATION@ \$HYPHY_LIB/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf > '$fubar_log'
    #end if
    ]]></command>
  <inputs>
    <param name="input_file" type="data" format="fasta" label="Input FASTA file"/>
    <param name="input_nhx" type="data" format="nhx" label="Input newick file"/>
    <param name="gencodeid" type="select" label="Genetic code">
      <option value="1">Universal code</option>
      <option value="2">Vertebrate mitochondrial DNA code</option>
      <option value="3">Yeast mitochondrial DNA code</option>
      <option value="4">Mold, Protozoan and Coelenterate mt;
      Mycloplasma/Spiroplasma</option>
      <option value="5">Invertebrate mitochondrial DNA
      code</option>
      <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear
      code</option>
      <option value="7">Echinoderm mitochondrial DNA code</option>
      <option value="8">Euplotid Nuclear code</option>
      <option value="9">Alternative Yeast Nuclear code</option>
      <option value="10">Ascidian mitochondrial DNA code</option>
      <option value="11">Flatworm mitochondrial DNA code</option>
      <option value="12">Blepharisma Nuclear code</option>
    </param>
    <param name="posteriorEstimationMethod" type="select" label="Posterior estimation method">
      <option value="1">Metropolis-Hastings - Full
      Metropolis-Hastings MCMC algorithm (slowest, original 2013
      paper implementation)</option>
      <option value="2">Collapsed Gibbs - Collapsed Gibbs sampler
      (intermediate speed)</option>
      <option value="3">Variational Bayes - 0-th order Variational
      Bayes approximations (fastest, recommended default)</option>
    </param>
    <param name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points"/>
    <param name="mcmc" type="integer" value="5" min="2" max="20" label="Number of MCMC chains"/>
    <param name="chain_length" type="integer" value="2000000" min="500000" max="50000000" label="Length of each chain"/>
    <param name="samples" type="integer" value="1000000" min="100000" max="1900000" label="Samples to use for burn-in"/>
    <param name="samples_per_chain" type="integer" value="100" min="50" max="1000000" label="Samples to draw from each chain"/>
    <param name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior"/>
  </inputs>
  <outputs>
    <data name="fubar_log" format="txt"/>
    <data name="fubar_output" format="json" from_work_dir="fubar_input.fa.FUBAR.json"/>
  </outputs>
  <tests>
    <test>
      <param name="input_file" ftype="fasta" value="fubar-in1.fa"/>
      <param name="input_nhx" ftype="nhx" value="fubar-in1.nhx"/>
      <output name="fubar_output" file="fubar-out1.json" compare="sim_size"/>
    </test>
  </tests>
  <help><![CDATA[
Model-based selection analyses (such as those performed by PAML and HyPhy) can be slow, becoming impractical for large alignments. We present a method to model and detect selection much faster than existing methods and to leverage Bayesian MCMC to robustly account for parameter estimation errors.

Results: By exploiting some commonly used approximations, FUBAR can perform detection of positive selection under a model that allows rich site- to-site rate variation about 30 to 50 times faster than existing random effects likelihood methods, and 10 to 30 times faster than existing fixed effects likelihood methods. We introduce an ultra-fast MCMC routine that allows a flexible prior specification, with no parametric constraints on the prior shape. Furthermore, our method allows us to visualize Bayesian inference for each site, revealing the model supported by the data.
        ]]></help>
  <expand macro="citations">
    <citation type="doi">10.1093/molbev/mst030</citation>
  </expand>
</tool>
author	rdvelazquez
date	Mon, 03 Dec 2018 15:00:30 -0500
parents	203597c4875e
children	b19b7712ab0a