view genetrack.xml @ 12:cd105fdfb0da draft

Uploaded
author greg
date Wed, 16 Dec 2015 12:22:27 -0500
parents 497e3274f70b
children ebafcd6c3e0e
line wrap: on
line source

<?xml version="1.0"?>
<tool id="genetrack" name="GeneTrack" version="@WRAPPER_VERSION@.0">
    <description>peak predictor</description>
    <macros>
        <import>genetrack_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command>
        python $__tool_directory__/genetrack.py
        --input_format $input_format_cond.input_format
        #if str($input_format_cond.input_format) == "scidx":
            #for $i in $input_format_cond.input_scidx:
                 --input "${i}" "${i.hid}"
            #end for
        #elif str($input_format_cond.input_format) == "gff":
            #for $i in $input_format_cond.input_gff:
                 --input "${i}" "${i.hid}"
            #end for
        #end if
        --sigma $sigma
        --exclusion $exclusion
        --up_width $up_width
        --down_width $down_width
        --filter $filter
    </command>
    <inputs>
        <conditional name="input_format_cond">
            <param name="input_format" type="select" label="Format of files for conversion">
                <option value="scidx" selected="True">ScIdx</option>
                <option value="gff">Gff</option>
            </param>
            <when value="scidx">
                <param name="input_scidx" type="data" format="scidx" multiple="True" label="Predict peaks on" />
            </when>
            <when value="gff">
                <param  name="input_gff" type="data" format="gff" multiple="True" label="Predict peaks on" />
            </when>
        </conditional>
        <param name="sigma" type="integer" value="5" min="1" label="Sigma to use when smoothing reads" help="Higher values increase computation but produce more smoothing." />
        <param name="exclusion" type="integer" value="20" min="1" label="Peak exclusion zone" help="Exclusion zone around each peak that prevents others from being called." />
        <param name="up_width" type="integer" value="10" min="0" label="Exclusion zone of upstream called peaks" />
        <param name="down_width" type="integer" value="10" min="0" label="Exclusion zone of downstream called peaks" />
        <param name="filter" type="integer" value="3" min="0" label="Absolute read filter" help="Removes peaks with lower peak height." />
    </inputs>
    <outputs>
        <collection name="genetrack_output" type="list" label="Genetrack results on ${on_string}">
            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="output" ext="gff" visible="false" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="input_gff" value="genetrack_input2.gff" ftype="gff" />
            <param name="input_format" value="gff" />
            <param name="sigma" value="5" />
            <param name="exclusion" value="20" />
            <param name="up_width" value="10" />
            <param name="down_width" value="10" />
            <param name="filter" value="3" />
            <output_collection name="genetrack_output" type="list">
                <element name="s5e20u10d10F3_on_data_1" file="genetrack_output2.gff" ftype="gff" />
            </output_collection>
        </test>
        <test>
            <param name="input_scidx" value="genetrack_input3.scidx" ftype="scidx" />
            <param name="input_format" value="scidx" />
            <param name="sigma" value="5" />
            <param name="exclusion" value="20" />
            <param name="up_width" value="10" />
            <param name="down_width" value="10" />
            <param name="filter" value="3" />
            <output_collection name="genetrack_output" type="list">
                <element name="s5e20u10d10F3_on_data_1" file="genetrack_output3.gff" ftype="gff" />
            </output_collection>
        </test>
        <test>
            <param name="input_gff" value="genetrack_input_unsorted4.gff" ftype="gff" />
            <param name="input_format" value="gff" />
            <param name="sigma" value="5" />
            <param name="exclusion" value="20" />
            <param name="up_width" value="10" />
            <param name="down_width" value="10" />
            <param name="filter" value="3" />
            <output_collection name="genetrack_output" type="list">
                <element name="s5e20u10d10F3_on_data_1" file="genetrack_output4.gff" ftype="gff" />
            </output_collection>
        </test>
    </tests>
    <help>
**What it does**

<![CDATA[

GeneTrack separately identifies peaks on the forward "+” and reverse “-” strand.  The way that GeneTrack works
is to replace each tag with a probabilistic distribution of occurrences for that tag at and around its mapped
genomic coordinate.  The distance decay of the probabilistic distribution is set by adjusting the value of the
tool's **Sigma to use when smoothing reads** parameter.  GeneTrack then sums the distribution over all mapped
tags.  This results in a smooth continuous trace that can be globally broadened or tightened by adjusting the
sigma value.  GeneTrack starts with the highest smoothed peak first, treating each strand separately if indicated
by the data, then sets up an exclusion zone (centered over the peak) defined by the value of the **Peak exclusion
zone** parameter (see figure).  The exclusion zone prevents any secondary peaks from being called on the same strand
within that exclusion zone.  In rare cases, it may be desirable to set different exclusion zones upstream (more 5’)
versus downstream (more 3’) of the peak.

]]>

.. image:: $PATH_TO_IMAGES/genetrack.png

-----

**Options**

* **Sigma to use when smoothing reads** - Smooths clusters of tags via a Gaussian distribution.
* **Peak exclusion zone** - Exclusion zone around each peak, eliminating all other peaks on the same strand that are within a ± bp distance of the peak.
* **Exclusion zone of upstream called peaks** - Defines the exclusion zone centered over peaks upstream of a peak.
* **Exclusion zone of downstream called peaks** - Defines the exclusion zone centered over peaks downstream of a peak.
* **Filter** - Absolute read filter, restricts output to only peaks with larger peak height.
    </help>
    <expand macro="citations" />
</tool>