Mercurial > repos > bebatut > qiime

<tool id="qiime_assign_taxonomy" name="assign taxonomy" version="1.9.1galaxy1">

    <description>Assign taxonomy to each sequence</description>

    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements" />

    <command>
<![CDATA[
    assign_taxonomy.py -i $input_fasta_fp

        #if str($id_to_taxonomy_fp) != 'None':
         -t $id_to_taxonomy_fp
        #end if

        #if str($reference_seqs_fp) != 'None':
         -r $reference_seqs_fp
        #end if

        #if str($methodcond.assignment_method) = 'None':
         -m uclust
        #end if

        #if str($methodcond.assignment_method) != 'None':
         -m $methodcond.assignment_method
        #end if

        #if $methodcond.assignment_method == "rtax":

            #if $methodcond.single_ok:
                --single_ok
            #end if

            #if $methodcond.no_single_ok_generic:
                --no_single_ok_generic
            #end if

            #if str($methodcond.read_id_regex):
                --read_id_regex=$methodcond.read_id_regex
            #end if

            #if str($methodcond.amplicon_id_regex):
                --amplicon_id_regex=$methodcond.amplicon_id_regex
            #end if

            #if str($methodcond.header_id_regex):
                --header_id_regex=$methodcond.header_id_regex
            #end if
        #end if

        #if $methodcond.assignment_method == "sortmerna":

            #if str($methodcond.sortmerna_db):
                --sortmerna_db=$methodcond.sortmerna_db
            #end if

            #if $methodcond.sortmerna_e_value:
                --sortmerna_e_value=$methodcond.sortmerna_e_value
            #end if

            #if $methodcond.sortmerna_coverage:
                --sortmerna_coverage=$methodcond.sortmerna_coverage
            #end if

            #if $methodcond.sortmerna_best_N_alignments:
                --sortmerna_best_N_alignments=$methodcond.sortmerna_best_N_alignments
            #end if

            #if str($methodcond.sortmerna_threads):
                --sortmerna_threads=$methodcond.sortmerna_threads
            #end if

            #if $methodcond.min_consensus_fraction:
                --min_consensus_fraction=$methodcond.min_consensus_fraction
            #end if

            #if $methodcond.similarity:
                --similarity=$methodcond.similarity
            #end if
        #end if

        #if $methodcond.assignment_method == "blast":

            #if str($methodcond.blast_db) != 'None':
                -b \$BLAST_DB_NAME
            #end if

            #if $methodcond.blast_e_value:
                -e $methodcond.blast_e_value
            #end if
        #end if

        #if $methodcond.assignment_method == "rdp":

            #if $methodcond.confidence:
                -c $methodcond.confidence
            #end if

            #if $methodcond.rdp_max_memory:
                --rdp_max_memory=$methodcond.rdp_max_memory
            #end if
        #end if

        #if $methodcond.assignment_method == "mothur":

            #if $methodcond.confidence:
                -c $methodcond.confidence
            #end if
        #end if

        #if $methodcond.assignment_method == "uclust":

            #if $methodcond.min_consensus_fraction:
                --min_consensus_fraction=$methodcond.min_consensus_fraction
            #end if

            #if $methodcond.similarity:
                --similarity=$methodcond.similarity
            #end if

            #if $methodcond.uclust_max_accepts:
                --uclust_max_accepts=$methodcond.uclust_max_accepts
            #end if
        #end if
         -o assign_taxonomy_output
]]>
    </command>

    <inputs>
        <param label="-i/--input_fasta_fp: path to the input fasta file"
            name="input_fasta_fp" optional="False" type="data"/>
        <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt"
            label="-t/--id_to_taxonomy_fp: Path to tab-delimited file mapping
            sequences to assigned taxonomy. Each assigned taxonomy is provided as
            a semicolon-separated list. For assignment with rdp, each assigned
            taxonomy must be exactly 6 levels deep. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt]"
            name="id_to_taxonomy_fp" optional="True" type="data"/>
        <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta"
            label="-r/--reference_seqs_fp: Path to reference sequences.  For
            assignment with blast, these are used to generate a blast database.
            For assignment with rdp, they are used as training sequences for the
            classifier. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta]"
            name="reference_seqs_fp" optional="True" type="data"/>

        <conditional name="methodcond">
            <param label="-m/--assignment_method: Taxon assignment method, must be
                one of rdp, blast, rtax, mothur, uclust, sortmerna [default: uclust]"
                name="assignment_method" optional="False" type="select">
                <option selected="True" value="uclust">uclust</option>
                <option value="rdp">rdp</option>
                <option value="blast">blast</option>
                <option value="rtax">rtax</option>
                <option value="mothur">mothur</option>
                <option value="sortmerna">sortmerna</option>
            </param>
            <when value="rtax">
                <param label="--single_ok: When classifying paired ends, allow
                    fallback to single-ended classification when the mate pair is
                    lacking (used for RTAX only). [default: False]" name="single_ok"
                    selected="False" type="boolean"/>
                <param label="--no_single_ok_generic: When classifying paired ends,
                    do not allow fallback to single-ended classification when the
                    mate pair is overly generic (used for RTAX only). [default: False]"
                    name="no_single_ok_generic" selected="False" type="boolean"/>
                <param default="\S+\s+(\S+)" label="--read_id_regex: Used to parse
                    the result of OTU clustering, to get the read_1_id for each
                    clusterID.  The clusterID itself is assumed to be the first
                    field, and is not captured by the regex.  (used for RTAX only).
                    [default: \S+\s+(\S+)]" name="read_id_regex" optional="True"
                    type="text"/>
                <param default="(\S+)\s+(\S+?)\/" label="--amplicon_id_regex: Used
                    to parse the result of split_libraries, to get the ampliconID
                    for each read_1_id.  Two groups capture read_1_id and ampliconID,
                    respectively.  (used for RTAX only). [default: (\S+)\s+(\S+?)\/]"
                    name="amplicon_id_regex" optional="True" type="text"/>
                <param default="\S+\s+(\S+?)\/" label="--header_id_regex: Used to
                    parse the result of split_libraries, to get the portion of the
                    header that RTAX uses to match mate pairs.  The default uses
                    the amplicon ID, not including /1 or /3, as the primary key
                    for the query sequences.  Typically this regex will be the
                    same as amplicon_id_regex, except that only the second group
                    is captured.  (used for RTAX only). [default: \S+\s+(\S+?)\/]"
                    name="header_id_regex" optional="True" type="text"/>
            </when>
            <when value="sortmerna">
                <param label="--sortmerna_db: Pre-existing database to search
                    against when using sortmerna [default: None]" name="sortmerna_db"
                    optional="True" type="text"/>
                <param default="1.0" label="--sortmerna_e_value: Maximum E-value
                    when clustering [default = 1.0]" name="sortmerna_e_value"
                    optional="True" type="float"/>
                <param default="0.9" label="--sortmerna_coverage: Mininum percent
                    query coverage (of an alignment) to consider a hit, expressed
                    as a fraction between 0 and 1 [default: 0.9]"
                    name="sortmerna_coverage" optional="True" type="float"/>
                <param default="5" label="--sortmerna_best_N_alignments: This option
                    specifies how many best alignments per read will be written
                    [default: 5]" name="sortmerna_best_N_alignments" optional="True"
                    type="integer"/>
                <param default="1" label="--sortmerna_threads: Specify number of
                    threads to be used for sortmerna mapper which utilizes multithreading.
                    [default: 1]" name="sortmerna_threads" optional="True"
                    type="text"/>
                <param default="0.51" label="--min_consensus_fraction: Minimum
                    fraction of database hits that must have a specific taxonomic
                    assignment to assign that taxonomy to a query, only used for
                    sortmerna and uclust methods [default: 0.51]"
                    name="min_consensus_fraction" optional="True" type="float"/>
                <param default="0.9" label="--similarity: Minimum percent similarity
                    (expressed as a fraction between 0 and 1) to consider a database
                    match a hit, only used for sortmerna and uclust methods
                    [default: 0.9]" name="similarity" optional="True" type="float"/>
            </when>
            <when value="blast">
                <param label="-b/--blast_db: Database to blast against.  Must provide
                    either --blast_db or --reference_seqs_db for assignment with blast
                     [default: None]" name="blast_db" optional="True" type="data"/>
                <param default="0.001" label="-e/--blast_e_value: Maximum e-value
                    to record an assignment, only used for blast method [default:
                    0.001]" name="blast_e_value" optional="True" type="float"/>
            </when>
            <when value="rdp">
                <param default="0.5" label="-c/--confidence: Minimum confidence to
                    record an assignment, only used for rdp and mothur methods
                    [default: 0.5]" name="confidence" optional="True" type="float"/>
                <param default="4000" label="--rdp_max_memory: Maximum memory
                    allocation, in MB, for Java virtual machine when using the
                    rdp method.  Increase for large training sets [default: 4000]"
                    name="rdp_max_memory" optional="True" type="integer"/>
            </when>
            <when value="mothur">
                <param default="0.5" label="-c/--confidence: Minimum confidence to
                    record an assignment, only used for rdp and mothur methods
                    [default: 0.5]" name="confidence" optional="True" type="float"/>
            </when>
            <when value="uclust">
                <param default="0.51" label="--min_consensus_fraction: Minimum
                    fraction of database hits that must have a specific taxonomic
                    assignment to assign that taxonomy to a query, only used for
                    sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction"
                    optional="True" type="float"/>
                <param default="0.9" label="--similarity: Minimum percent similarity
                    (expressed as a fraction between 0 and 1) to consider a database
                    match a hit, only used for sortmerna and uclust methods [default:
                    0.9]" name="similarity" optional="True" type="float"/>
                <param default="3" label="--uclust_max_accepts: Number of database
                    hits to consider when making an assignment, only used for uclust
                    method [default: 3]" name="uclust_max_accepts" optional="True"
                    type="integer"/>
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data format="txt" from_work_dir="assign_taxonomy_output/*.log"
            label="tax_assignements.log" name="tax_assignements.log"/>
        <data format="txt" from_work_dir="assign_taxonomy_output/*.txt"
            label="tax_assignements.txt" name="tax_assignements.txt"/>
    </outputs>

    <tests>
        <test>
        </test>
    </tests>

    <help><![CDATA[
**What it does**

Contains code for assigning taxonomy, using several techniques.

Given a set of sequences, %prog attempts to assign the taxonomy of each sequence.
Currently the methods implemented are assignment with BLAST, the RDP classifier,
RTAX, mothur, and uclust. The output of this step is an observation metadata
mapping file of input sequence identifiers (1st column of output file) to taxonomy
(2nd column) and quality score (3rd column). There may be method-specific information
in subsequent columns.

Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in
the Greengenes reference OTU builds. To get the latest build of the Greengenes OTUs
(and other marker gene OTU collections), follow the "Resources" link from http://qiime.org.
After downloading and unzipping you can use the following files as -r and -t, where
<otus_dir> is the name of the new directory after unzipping the reference OTUs tgz
    file.

-r <otus_dir>/rep_set/97_otus.fasta
-t <otus_dir></otus_dir>/taxonomy/97_otu_taxonomy.txt

The consensus taxonomy assignment implemented here is the most detailed lineage
description shared by 90% or more of the sequences within the OTU (this level of
agreement can be adjusted by the user). The full lineage information for each
sequence is one of the output files of the analysis. In addition, a conflict file
records cases in which a phylum-level taxonomy assignment disagreement exists
within an OTU (such instances are rare and can reflect sequence misclassification
within the greengenes database).
    ]]>
    </help>

    <citations>
        <expand macro="citations" />
    </citations>
</tool>
author	bebatut
date	Tue, 02 Feb 2016 05:50:37 -0500
parents
children