view hyphy_fubar.xml @ 4:02ba486058f2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit b'3845ccc8f689315262a6df56111bcf191c20d136\n'-dirty
author rdvelazquez
date Tue, 20 Nov 2018 10:18:58 -0500
parents fa2236bc5ab2
children 203597c4875e
line wrap: on
line source

<?xml version="1.0"?>
<tool id="hyphy_fubar" name="HyPhy-FUBAR" version="@VERSION@+galaxy0">
    <description>: Fast Unconstrained Bayesian AppRoximation</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <version_command>HYPHYMP --version | tail -n 1 | awk '{print $1}'</version_command>
    <command detect_errors="exit_code">
    <![CDATA[
    ln -s '$input_file' fubar_input.fa &&
    ln -s '$input_nhx' fubar_input.nhx &&
    echo $gencodeid > tool_params &&
    echo `pwd`/fubar_input.fa >> tool_params &&
    echo `pwd`/fubar_input.nhx >> tool_params &&
    echo '$grid_points' >> tool_params &&
    echo '$mcmc' >> tool_params &&
    echo '$chain_length' >> tool_params &&
    echo '$samples' >> tool_params &&
    echo '$samples_per_chain' >> tool_params &&
    echo '$concentration' >> tool_params &&
    @HYPHY_INVOCATION@ \$HYPHY_LIB/TemplateBatchFiles/SelectionAnalyses/FUBAR.bf > '$fubar_log'
    ]]>
    </command>
    <inputs>
        <param name="input_file" type="data" format="fasta" label="Input FASTA file" />
        <param name="input_nhx" type="data" format="nhx" label="Input newick file" />
        <param name="gencodeid" type="select" label="Genetic code">
            <option value="1">Universal code</option>
            <option value="2">Vertebrate mitochondrial DNA code</option>
            <option value="3">Yeast mitochondrial DNA code</option>
            <option value="4">Mold, Protozoan and Coelenterate mt; Mycloplasma/Spiroplasma</option>
            <option value="5">Invertebrate mitochondrial DNA code</option>
            <option value="6">Ciliate, Dasycladacean and Hexamita Nuclear code</option>
            <option value="7">Echinoderm mitochondrial DNA code</option>
            <option value="8">Euplotid Nuclear code</option>
            <option value="9">Alternative Yeast Nuclear code</option>
            <option value="10">Ascidian mitochondrial DNA code</option>
            <option value="11">Flatworm mitochondrial DNA code</option>
            <option value="12">Blepharisma Nuclear code</option>
        </param>
        <param name="grid_points" type="integer" value="20" min="5" max="50" label="Grid points" />
        <param name="mcmc" type="integer" value="5" min="2" max="20" label="Number of MCMC chains" />
        <param name="chain_length" type="integer" value="2000000" min="500000" max="50000000" label="Length of each chain" />
        <param name="samples" type="integer" value="1000000" min="100000" max="1900000" label="Samples to use for burn-in" />
        <param name="samples_per_chain" type="integer" value="100" min="50" max="1000000" label="Samples to draw from each chain" />
        <param name="concentration" type="float" value="0.5" min="0.001" max="1" label="Concentration parameter of the Dirichlet prior" />
    </inputs>
    <outputs>
        <data name="fubar_log" format="txt" />
        <data name="fubar_output" format="json" from_work_dir="fubar_input.fa.FUBAR.json" />
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="fasta" value="fubar-in1.fa" />
            <param name="input_nhx" ftype="nhx" value="fubar-in1.nhx" />
            <output name="fubar_output" file="fubar-out1.json" compare="sim_size" />
        </test>
    </tests>
    <help>
        <![CDATA[
Model-based selection analyses (such as those performed by PAML and HyPhy) can be slow, becoming impractical for large alignments. We present a method to model and detect selection much faster than existing methods and to leverage Bayesian MCMC to robustly account for parameter estimation errors.

Results: By exploiting some commonly used approximations, FUBAR can perform detection of positive selection under a model that allows rich site- to-site rate variation about 30 to 50 times faster than existing random effects likelihood methods, and 10 to 30 times faster than existing fixed effects likelihood methods. We introduce an ultra-fast MCMC routine that allows a flexible prior specification, with no parametric constraints on the prior shape. Furthermore, our method allows us to visualize Bayesian inference for each site, revealing the model supported by the data.
        ]]>
    </help>
    <expand macro="citations">
        <citation type="doi">10.1093/molbev/mst030</citation>
    </expand>
</tool>