view assign_taxonomy.xml @ 0:c1bd0c560018 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author bebatut
date Tue, 02 Feb 2016 05:50:37 -0500
parents
children
line wrap: on
line source

<tool id="qiime_assign_taxonomy" name="assign taxonomy" version="1.9.1galaxy1">

    <description>Assign taxonomy to each sequence</description>

    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements" />

    <command>
<![CDATA[   
    assign_taxonomy.py -i $input_fasta_fp

        #if str($id_to_taxonomy_fp) != 'None':
         -t $id_to_taxonomy_fp
        #end if

        #if str($reference_seqs_fp) != 'None':
         -r $reference_seqs_fp
        #end if

        #if str($methodcond.assignment_method) = 'None':
         -m uclust
        #end if

        #if str($methodcond.assignment_method) != 'None':
         -m $methodcond.assignment_method
        #end if

        #if $methodcond.assignment_method == "rtax":

            #if $methodcond.single_ok:
                --single_ok
            #end if

            #if $methodcond.no_single_ok_generic:
                --no_single_ok_generic
            #end if

            #if str($methodcond.read_id_regex):
                --read_id_regex=$methodcond.read_id_regex
            #end if

            #if str($methodcond.amplicon_id_regex):
                --amplicon_id_regex=$methodcond.amplicon_id_regex
            #end if

            #if str($methodcond.header_id_regex):
                --header_id_regex=$methodcond.header_id_regex
            #end if
        #end if

        #if $methodcond.assignment_method == "sortmerna":

            #if str($methodcond.sortmerna_db):
                --sortmerna_db=$methodcond.sortmerna_db
            #end if

            #if $methodcond.sortmerna_e_value:
                --sortmerna_e_value=$methodcond.sortmerna_e_value
            #end if

            #if $methodcond.sortmerna_coverage:
                --sortmerna_coverage=$methodcond.sortmerna_coverage
            #end if

            #if $methodcond.sortmerna_best_N_alignments:
                --sortmerna_best_N_alignments=$methodcond.sortmerna_best_N_alignments
            #end if

            #if str($methodcond.sortmerna_threads):
                --sortmerna_threads=$methodcond.sortmerna_threads
            #end if

            #if $methodcond.min_consensus_fraction:
                --min_consensus_fraction=$methodcond.min_consensus_fraction
            #end if

            #if $methodcond.similarity:
                --similarity=$methodcond.similarity
            #end if
        #end if

        #if $methodcond.assignment_method == "blast":

            #if str($methodcond.blast_db) != 'None':
                -b \$BLAST_DB_NAME
            #end if

            #if $methodcond.blast_e_value:
                -e $methodcond.blast_e_value
            #end if
        #end if

        #if $methodcond.assignment_method == "rdp":

            #if $methodcond.confidence:
                -c $methodcond.confidence
            #end if

            #if $methodcond.rdp_max_memory:
                --rdp_max_memory=$methodcond.rdp_max_memory
            #end if
        #end if

        #if $methodcond.assignment_method == "mothur":

            #if $methodcond.confidence:
                -c $methodcond.confidence
            #end if
        #end if

        #if $methodcond.assignment_method == "uclust":

            #if $methodcond.min_consensus_fraction:
                --min_consensus_fraction=$methodcond.min_consensus_fraction
            #end if

            #if $methodcond.similarity:
                --similarity=$methodcond.similarity
            #end if

            #if $methodcond.uclust_max_accepts:
                --uclust_max_accepts=$methodcond.uclust_max_accepts
            #end if
        #end if
         -o assign_taxonomy_output
]]>
    </command>

    <inputs>
        <param label="-i/--input_fasta_fp: path to the input fasta file" 
            name="input_fasta_fp" optional="False" type="data"/>
        <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt" 
            label="-t/--id_to_taxonomy_fp: Path to tab-delimited file mapping 
            sequences to assigned taxonomy. Each assigned taxonomy is provided as 
            a semicolon-separated list. For assignment with rdp, each assigned 
            taxonomy must be exactly 6 levels deep. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt]" 
            name="id_to_taxonomy_fp" optional="True" type="data"/>
        <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta" 
            label="-r/--reference_seqs_fp: Path to reference sequences.  For 
            assignment with blast, these are used to generate a blast database. 
            For assignment with rdp, they are used as training sequences for the 
            classifier. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta]" 
            name="reference_seqs_fp" optional="True" type="data"/>

        <conditional name="methodcond">
            <param label="-m/--assignment_method: Taxon assignment method, must be 
                one of rdp, blast, rtax, mothur, uclust, sortmerna [default: uclust]" 
                name="assignment_method" optional="False" type="select">
                <option selected="True" value="uclust">uclust</option>
                <option value="rdp">rdp</option>
                <option value="blast">blast</option>
                <option value="rtax">rtax</option>
                <option value="mothur">mothur</option>
                <option value="sortmerna">sortmerna</option>
            </param>
            <when value="rtax">
                <param label="--single_ok: When classifying paired ends, allow 
                    fallback to single-ended classification when the mate pair is 
                    lacking (used for RTAX only). [default: False]" name="single_ok" 
                    selected="False" type="boolean"/>
                <param label="--no_single_ok_generic: When classifying paired ends, 
                    do not allow fallback to single-ended classification when the 
                    mate pair is overly generic (used for RTAX only). [default: False]" 
                    name="no_single_ok_generic" selected="False" type="boolean"/>
                <param default="\S+\s+(\S+)" label="--read_id_regex: Used to parse 
                    the result of OTU clustering, to get the read_1_id for each 
                    clusterID.  The clusterID itself is assumed to be the first 
                    field, and is not captured by the regex.  (used for RTAX only). 
                    [default: \S+\s+(\S+)]" name="read_id_regex" optional="True" 
                    type="text"/>
                <param default="(\S+)\s+(\S+?)\/" label="--amplicon_id_regex: Used 
                    to parse the result of split_libraries, to get the ampliconID 
                    for each read_1_id.  Two groups capture read_1_id and ampliconID, 
                    respectively.  (used for RTAX only). [default: (\S+)\s+(\S+?)\/]" 
                    name="amplicon_id_regex" optional="True" type="text"/>
                <param default="\S+\s+(\S+?)\/" label="--header_id_regex: Used to 
                    parse the result of split_libraries, to get the portion of the 
                    header that RTAX uses to match mate pairs.  The default uses 
                    the amplicon ID, not including /1 or /3, as the primary key 
                    for the query sequences.  Typically this regex will be the 
                    same as amplicon_id_regex, except that only the second group 
                    is captured.  (used for RTAX only). [default: \S+\s+(\S+?)\/]" 
                    name="header_id_regex" optional="True" type="text"/>
            </when>
            <when value="sortmerna">
                <param label="--sortmerna_db: Pre-existing database to search 
                    against when using sortmerna [default: None]" name="sortmerna_db" 
                    optional="True" type="text"/>
                <param default="1.0" label="--sortmerna_e_value: Maximum E-value 
                    when clustering [default = 1.0]" name="sortmerna_e_value" 
                    optional="True" type="float"/>
                <param default="0.9" label="--sortmerna_coverage: Mininum percent 
                    query coverage (of an alignment) to consider a hit, expressed 
                    as a fraction between 0 and 1 [default: 0.9]" 
                    name="sortmerna_coverage" optional="True" type="float"/>
                <param default="5" label="--sortmerna_best_N_alignments: This option 
                    specifies how many best alignments per read will be written 
                    [default: 5]" name="sortmerna_best_N_alignments" optional="True" 
                    type="integer"/>
                <param default="1" label="--sortmerna_threads: Specify number of 
                    threads to be used for sortmerna mapper which utilizes multithreading. 
                    [default: 1]" name="sortmerna_threads" optional="True" 
                    type="text"/>
                <param default="0.51" label="--min_consensus_fraction: Minimum 
                    fraction of database hits that must have a specific taxonomic 
                    assignment to assign that taxonomy to a query, only used for 
                    sortmerna and uclust methods [default: 0.51]" 
                    name="min_consensus_fraction" optional="True" type="float"/>
                <param default="0.9" label="--similarity: Minimum percent similarity 
                    (expressed as a fraction between 0 and 1) to consider a database 
                    match a hit, only used for sortmerna and uclust methods 
                    [default: 0.9]" name="similarity" optional="True" type="float"/>
            </when>
            <when value="blast">
                <param label="-b/--blast_db: Database to blast against.  Must provide 
                    either --blast_db or --reference_seqs_db for assignment with blast
                     [default: None]" name="blast_db" optional="True" type="data"/>
                <param default="0.001" label="-e/--blast_e_value: Maximum e-value 
                    to record an assignment, only used for blast method [default: 
                    0.001]" name="blast_e_value" optional="True" type="float"/>
            </when>
            <when value="rdp">
                <param default="0.5" label="-c/--confidence: Minimum confidence to 
                    record an assignment, only used for rdp and mothur methods 
                    [default: 0.5]" name="confidence" optional="True" type="float"/>
                <param default="4000" label="--rdp_max_memory: Maximum memory 
                    allocation, in MB, for Java virtual machine when using the 
                    rdp method.  Increase for large training sets [default: 4000]" 
                    name="rdp_max_memory" optional="True" type="integer"/>
            </when>
            <when value="mothur">
                <param default="0.5" label="-c/--confidence: Minimum confidence to 
                    record an assignment, only used for rdp and mothur methods 
                    [default: 0.5]" name="confidence" optional="True" type="float"/>
            </when>
            <when value="uclust">
                <param default="0.51" label="--min_consensus_fraction: Minimum 
                    fraction of database hits that must have a specific taxonomic 
                    assignment to assign that taxonomy to a query, only used for 
                    sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction" 
                    optional="True" type="float"/>
                <param default="0.9" label="--similarity: Minimum percent similarity 
                    (expressed as a fraction between 0 and 1) to consider a database 
                    match a hit, only used for sortmerna and uclust methods [default: 
                    0.9]" name="similarity" optional="True" type="float"/>
                <param default="3" label="--uclust_max_accepts: Number of database 
                    hits to consider when making an assignment, only used for uclust 
                    method [default: 3]" name="uclust_max_accepts" optional="True" 
                    type="integer"/>
            </when>
        </conditional>
    </inputs>
    
    <outputs>
        <data format="txt" from_work_dir="assign_taxonomy_output/*.log" 
            label="tax_assignements.log" name="tax_assignements.log"/>
        <data format="txt" from_work_dir="assign_taxonomy_output/*.txt" 
            label="tax_assignements.txt" name="tax_assignements.txt"/>
    </outputs>

    <tests>
        <test>
        </test>
    </tests>

    <help><![CDATA[
**What it does**

Contains code for assigning taxonomy, using several techniques.

Given a set of sequences, %prog attempts to assign the taxonomy of each sequence. 
Currently the methods implemented are assignment with BLAST, the RDP classifier, 
RTAX, mothur, and uclust. The output of this step is an observation metadata 
mapping file of input sequence identifiers (1st column of output file) to taxonomy 
(2nd column) and quality score (3rd column). There may be method-specific information 
in subsequent columns.

Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in 
the Greengenes reference OTU builds. To get the latest build of the Greengenes OTUs 
(and other marker gene OTU collections), follow the "Resources" link from http://qiime.org. 
After downloading and unzipping you can use the following files as -r and -t, where 
<otus_dir> is the name of the new directory after unzipping the reference OTUs tgz 
    file.

-r <otus_dir>/rep_set/97_otus.fasta
-t <otus_dir></otus_dir>/taxonomy/97_otu_taxonomy.txt

The consensus taxonomy assignment implemented here is the most detailed lineage 
description shared by 90% or more of the sequences within the OTU (this level of 
agreement can be adjusted by the user). The full lineage information for each 
sequence is one of the output files of the analysis. In addition, a conflict file 
records cases in which a phylum-level taxonomy assignment disagreement exists 
within an OTU (such instances are rare and can reflect sequence misclassification 
within the greengenes database).
    ]]>
    </help>

    <citations>
        <expand macro="citations" />
    </citations>
</tool>