view filter_alignment.xml @ 0:c1bd0c560018 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author bebatut
date Tue, 02 Feb 2016 05:50:37 -0500
parents
children
line wrap: on
line source

<tool id="qiime_filter_alignment" name="filter alignment" version="1.9.1galaxy1">
    <description>Filter sequence alignment by removing highly variable 
        regions</description>

    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements" />

    <command><![CDATA[
        filter_alignment.py -i $input_fasta_file -o filter_alignment_output
            #if str($lane_mask_fp) != 'None':
             -m $lane_mask_fp
            #end if

            #if $suppress_lane_mask_filter:
             -s
            #end if

            #if $allowed_gap_frac:
             -g $allowed_gap_frac
            #end if

            #if $remove_outliers:
             -r
            #end if

            #if $threshold:
             -t $threshold
            #end if

            #if $entropy_threshold:
             -e $entropy_threshold
            #end if
 ]]>
    </command>

    <inputs>
        <param label="-i/--input_fasta_file: the input fasta file containing the 
            alignment" name="input_fasta_file" optional="False" type="data"/>
        <param default="None" label="-m/--lane_mask_fp: path to lane mask file 
            [default: 16S alignment lane mask (Lane, D.J. 1991)]" 
            name="lane_mask_fp" optional="True" type="data"/>
        <param label="-s/--suppress_lane_mask_filter: suppress lane mask filtering 
            [default: False]" name="suppress_lane_mask_filter" selected="False" 
            type="boolean"/>
        <param default="0.999999" label="-g/--allowed_gap_frac: gap filter threshold, 
            filters positions which are gaps in &gt; allowed_gap_frac of the 
            sequences [default: 0.999999]" name="allowed_gap_frac" optional="True" 
            type="float"/>
        <param label="-r/--remove_outliers: remove seqs very dissimilar to the 
            alignment consensus (see --threshold).  [default: False]" 
            name="remove_outliers" selected="False" type="boolean"/>
        <param default="3.0" label="-t/--threshold: with -r, remove seqs whose 
            dissimilarity to the consensus sequence is approximately &gt; x standard 
            deviations above the mean of the sequences [default: 3.0]" name="threshold" 
            optional="True" type="float"/>
        <param default="None" label="-e/--entropy_threshold: Percent threshold for 
            removing base positions with the highest entropy, expressed as a 
            fraction between 0 and 1.  For example, if 0.10 were specified, the 
            top 10% most entropic base positions would be filtered.  If this 
            value is used, any lane mask supplied will be ignored.  Entropy filtering 
            occurs after gap filtering. [default: None]" name="entropy_threshold" 
            optional="True" type="float"/>
    </inputs>

    <outputs>
        <data format="fasta" name="pfiltered.fasta" 
            from_work_dir="filter_alignment_output/*pfiltered.fasta" 
            label="filtered_alignement.fasta"/>
    </outputs>

    <tests>
        <test>
        </test>
    </tests>

    <help><![CDATA[
**What it does**

This script should be applied to generate a useful tree when aligning against a 
template alignment (e.g., with PyNAST). This script will remove positions which 
are gaps in every sequence (common for PyNAST, as typical sequences cover only 
200-400 bases, and they are being aligned against the full 16S gene). Additionally, 
the user can supply a lanemask file, that defines which positions should included 
when building the tree, and which should be ignored. Typically, this will differentiate 
between non-conserved positions, which are uninformative for tree building, and 
conserved positions which are informative for tree building. FILTERING ALIGNMENTS 
WHICH WERE BUILT WITH PYNAST AGAINST THE GREENGENES CORE SET ALIGNMENT SHOULD BE 
CONSIDERED AN ESSENTIAL STEP.

The output of filter_alignment.py consists of a single FASTA file, which ends with 
&quot;pfiltered.fasta&quot;, where the &quot;p&quot; stands for positional 
filtering of the columns.
    ]]>
    </help>

    <citations>
        <expand macro="citations" />
    </citations>
</tool>