Mercurial > repos > bebatut > qiime

diff assign_taxonomy.xml @ 0:c1bd0c560018 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author: bebatut
date: Tue, 02 Feb 2016 05:50:37 -0500
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/assign_taxonomy.xml	Tue Feb 02 05:50:37 2016 -0500
@@ -0,0 +1,304 @@
+<tool id="qiime_assign_taxonomy" name="assign taxonomy" version="1.9.1galaxy1">
+
+    <description>Assign taxonomy to each sequence</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements" />
+
+    <command>
+<![CDATA[   
+    assign_taxonomy.py -i $input_fasta_fp
+
+        #if str($id_to_taxonomy_fp) != 'None':
+         -t $id_to_taxonomy_fp
+        #end if
+
+        #if str($reference_seqs_fp) != 'None':
+         -r $reference_seqs_fp
+        #end if
+
+        #if str($methodcond.assignment_method) = 'None':
+         -m uclust
+        #end if
+
+        #if str($methodcond.assignment_method) != 'None':
+         -m $methodcond.assignment_method
+        #end if
+
+        #if $methodcond.assignment_method == "rtax":
+
+            #if $methodcond.single_ok:
+                --single_ok
+            #end if
+
+            #if $methodcond.no_single_ok_generic:
+                --no_single_ok_generic
+            #end if
+
+            #if str($methodcond.read_id_regex):
+                --read_id_regex=$methodcond.read_id_regex
+            #end if
+
+            #if str($methodcond.amplicon_id_regex):
+                --amplicon_id_regex=$methodcond.amplicon_id_regex
+            #end if
+
+            #if str($methodcond.header_id_regex):
+                --header_id_regex=$methodcond.header_id_regex
+            #end if
+        #end if
+
+        #if $methodcond.assignment_method == "sortmerna":
+
+            #if str($methodcond.sortmerna_db):
+                --sortmerna_db=$methodcond.sortmerna_db
+            #end if
+
+            #if $methodcond.sortmerna_e_value:
+                --sortmerna_e_value=$methodcond.sortmerna_e_value
+            #end if
+
+            #if $methodcond.sortmerna_coverage:
+                --sortmerna_coverage=$methodcond.sortmerna_coverage
+            #end if
+
+            #if $methodcond.sortmerna_best_N_alignments:
+                --sortmerna_best_N_alignments=$methodcond.sortmerna_best_N_alignments
+            #end if
+
+            #if str($methodcond.sortmerna_threads):
+                --sortmerna_threads=$methodcond.sortmerna_threads
+            #end if
+
+            #if $methodcond.min_consensus_fraction:
+                --min_consensus_fraction=$methodcond.min_consensus_fraction
+            #end if
+
+            #if $methodcond.similarity:
+                --similarity=$methodcond.similarity
+            #end if
+        #end if
+
+        #if $methodcond.assignment_method == "blast":
+
+            #if str($methodcond.blast_db) != 'None':
+                -b \$BLAST_DB_NAME
+            #end if
+
+            #if $methodcond.blast_e_value:
+                -e $methodcond.blast_e_value
+            #end if
+        #end if
+
+        #if $methodcond.assignment_method == "rdp":
+
+            #if $methodcond.confidence:
+                -c $methodcond.confidence
+            #end if
+
+            #if $methodcond.rdp_max_memory:
+                --rdp_max_memory=$methodcond.rdp_max_memory
+            #end if
+        #end if
+
+        #if $methodcond.assignment_method == "mothur":
+
+            #if $methodcond.confidence:
+                -c $methodcond.confidence
+            #end if
+        #end if
+
+        #if $methodcond.assignment_method == "uclust":
+
+            #if $methodcond.min_consensus_fraction:
+                --min_consensus_fraction=$methodcond.min_consensus_fraction
+            #end if
+
+            #if $methodcond.similarity:
+                --similarity=$methodcond.similarity
+            #end if
+
+            #if $methodcond.uclust_max_accepts:
+                --uclust_max_accepts=$methodcond.uclust_max_accepts
+            #end if
+        #end if
+         -o assign_taxonomy_output
+]]>
+    </command>
+
+    <inputs>
+        <param label="-i/--input_fasta_fp: path to the input fasta file" 
+            name="input_fasta_fp" optional="False" type="data"/>
+        <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt" 
+            label="-t/--id_to_taxonomy_fp: Path to tab-delimited file mapping 
+            sequences to assigned taxonomy. Each assigned taxonomy is provided as 
+            a semicolon-separated list. For assignment with rdp, each assigned 
+            taxonomy must be exactly 6 levels deep. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt]" 
+            name="id_to_taxonomy_fp" optional="True" type="data"/>
+        <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta" 
+            label="-r/--reference_seqs_fp: Path to reference sequences.  For 
+            assignment with blast, these are used to generate a blast database. 
+            For assignment with rdp, they are used as training sequences for the 
+            classifier. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta]" 
+            name="reference_seqs_fp" optional="True" type="data"/>
+
+        <conditional name="methodcond">
+            <param label="-m/--assignment_method: Taxon assignment method, must be 
+                one of rdp, blast, rtax, mothur, uclust, sortmerna [default: uclust]" 
+                name="assignment_method" optional="False" type="select">
+                <option selected="True" value="uclust">uclust</option>
+                <option value="rdp">rdp</option>
+                <option value="blast">blast</option>
+                <option value="rtax">rtax</option>
+                <option value="mothur">mothur</option>
+                <option value="sortmerna">sortmerna</option>
+            </param>
+            <when value="rtax">
+                <param label="--single_ok: When classifying paired ends, allow 
+                    fallback to single-ended classification when the mate pair is 
+                    lacking (used for RTAX only). [default: False]" name="single_ok" 
+                    selected="False" type="boolean"/>
+                <param label="--no_single_ok_generic: When classifying paired ends, 
+                    do not allow fallback to single-ended classification when the 
+                    mate pair is overly generic (used for RTAX only). [default: False]" 
+                    name="no_single_ok_generic" selected="False" type="boolean"/>
+                <param default="\S+\s+(\S+)" label="--read_id_regex: Used to parse 
+                    the result of OTU clustering, to get the read_1_id for each 
+                    clusterID.  The clusterID itself is assumed to be the first 
+                    field, and is not captured by the regex.  (used for RTAX only). 
+                    [default: \S+\s+(\S+)]" name="read_id_regex" optional="True" 
+                    type="text"/>
+                <param default="(\S+)\s+(\S+?)\/" label="--amplicon_id_regex: Used 
+                    to parse the result of split_libraries, to get the ampliconID 
+                    for each read_1_id.  Two groups capture read_1_id and ampliconID, 
+                    respectively.  (used for RTAX only). [default: (\S+)\s+(\S+?)\/]" 
+                    name="amplicon_id_regex" optional="True" type="text"/>
+                <param default="\S+\s+(\S+?)\/" label="--header_id_regex: Used to 
+                    parse the result of split_libraries, to get the portion of the 
+                    header that RTAX uses to match mate pairs.  The default uses 
+                    the amplicon ID, not including /1 or /3, as the primary key 
+                    for the query sequences.  Typically this regex will be the 
+                    same as amplicon_id_regex, except that only the second group 
+                    is captured.  (used for RTAX only). [default: \S+\s+(\S+?)\/]" 
+                    name="header_id_regex" optional="True" type="text"/>
+            </when>
+            <when value="sortmerna">
+                <param label="--sortmerna_db: Pre-existing database to search 
+                    against when using sortmerna [default: None]" name="sortmerna_db" 
+                    optional="True" type="text"/>
+                <param default="1.0" label="--sortmerna_e_value: Maximum E-value 
+                    when clustering [default = 1.0]" name="sortmerna_e_value" 
+                    optional="True" type="float"/>
+                <param default="0.9" label="--sortmerna_coverage: Mininum percent 
+                    query coverage (of an alignment) to consider a hit, expressed 
+                    as a fraction between 0 and 1 [default: 0.9]" 
+                    name="sortmerna_coverage" optional="True" type="float"/>
+                <param default="5" label="--sortmerna_best_N_alignments: This option 
+                    specifies how many best alignments per read will be written 
+                    [default: 5]" name="sortmerna_best_N_alignments" optional="True" 
+                    type="integer"/>
+                <param default="1" label="--sortmerna_threads: Specify number of 
+                    threads to be used for sortmerna mapper which utilizes multithreading. 
+                    [default: 1]" name="sortmerna_threads" optional="True" 
+                    type="text"/>
+                <param default="0.51" label="--min_consensus_fraction: Minimum 
+                    fraction of database hits that must have a specific taxonomic 
+                    assignment to assign that taxonomy to a query, only used for 
+                    sortmerna and uclust methods [default: 0.51]" 
+                    name="min_consensus_fraction" optional="True" type="float"/>
+                <param default="0.9" label="--similarity: Minimum percent similarity 
+                    (expressed as a fraction between 0 and 1) to consider a database 
+                    match a hit, only used for sortmerna and uclust methods 
+                    [default: 0.9]" name="similarity" optional="True" type="float"/>
+            </when>
+            <when value="blast">
+                <param label="-b/--blast_db: Database to blast against.  Must provide 
+                    either --blast_db or --reference_seqs_db for assignment with blast
+                     [default: None]" name="blast_db" optional="True" type="data"/>
+                <param default="0.001" label="-e/--blast_e_value: Maximum e-value 
+                    to record an assignment, only used for blast method [default: 
+                    0.001]" name="blast_e_value" optional="True" type="float"/>
+            </when>
+            <when value="rdp">
+                <param default="0.5" label="-c/--confidence: Minimum confidence to 
+                    record an assignment, only used for rdp and mothur methods 
+                    [default: 0.5]" name="confidence" optional="True" type="float"/>
+                <param default="4000" label="--rdp_max_memory: Maximum memory 
+                    allocation, in MB, for Java virtual machine when using the 
+                    rdp method.  Increase for large training sets [default: 4000]" 
+                    name="rdp_max_memory" optional="True" type="integer"/>
+            </when>
+            <when value="mothur">
+                <param default="0.5" label="-c/--confidence: Minimum confidence to 
+                    record an assignment, only used for rdp and mothur methods 
+                    [default: 0.5]" name="confidence" optional="True" type="float"/>
+            </when>
+            <when value="uclust">
+                <param default="0.51" label="--min_consensus_fraction: Minimum 
+                    fraction of database hits that must have a specific taxonomic 
+                    assignment to assign that taxonomy to a query, only used for 
+                    sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction" 
+                    optional="True" type="float"/>
+                <param default="0.9" label="--similarity: Minimum percent similarity 
+                    (expressed as a fraction between 0 and 1) to consider a database 
+                    match a hit, only used for sortmerna and uclust methods [default: 
+                    0.9]" name="similarity" optional="True" type="float"/>
+                <param default="3" label="--uclust_max_accepts: Number of database 
+                    hits to consider when making an assignment, only used for uclust 
+                    method [default: 3]" name="uclust_max_accepts" optional="True" 
+                    type="integer"/>
+            </when>
+        </conditional>
+    </inputs>
+    
+    <outputs>
+        <data format="txt" from_work_dir="assign_taxonomy_output/*.log" 
+            label="tax_assignements.log" name="tax_assignements.log"/>
+        <data format="txt" from_work_dir="assign_taxonomy_output/*.txt" 
+            label="tax_assignements.txt" name="tax_assignements.txt"/>
+    </outputs>
+
+    <tests>
+        <test>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+Contains code for assigning taxonomy, using several techniques.
+
+Given a set of sequences, %prog attempts to assign the taxonomy of each sequence. 
+Currently the methods implemented are assignment with BLAST, the RDP classifier, 
+RTAX, mothur, and uclust. The output of this step is an observation metadata 
+mapping file of input sequence identifiers (1st column of output file) to taxonomy 
+(2nd column) and quality score (3rd column). There may be method-specific information 
+in subsequent columns.
+
+Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in 
+the Greengenes reference OTU builds. To get the latest build of the Greengenes OTUs 
+(and other marker gene OTU collections), follow the "Resources" link from http://qiime.org. 
+After downloading and unzipping you can use the following files as -r and -t, where 
+<otus_dir> is the name of the new directory after unzipping the reference OTUs tgz 
+    file.
+
+-r <otus_dir>/rep_set/97_otus.fasta
+-t <otus_dir></otus_dir>/taxonomy/97_otu_taxonomy.txt
+
+The consensus taxonomy assignment implemented here is the most detailed lineage 
+description shared by 90% or more of the sequences within the OTU (this level of 
+agreement can be adjusted by the user). The full lineage information for each 
+sequence is one of the output files of the analysis. In addition, a conflict file 
+records cases in which a phylum-level taxonomy assignment disagreement exists 
+within an OTU (such instances are rare and can reflect sequence misclassification 
+within the greengenes database).
+    ]]>
+    </help>
+
+    <citations>
+        <expand macro="citations" />
+    </citations>
+</tool>
author	bebatut
date	Tue, 02 Feb 2016 05:50:37 -0500
parents
children