Mercurial > repos > bebatut > qiime
view assign_taxonomy.xml @ 0:c1bd0c560018 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author | bebatut |
---|---|
date | Tue, 02 Feb 2016 05:50:37 -0500 |
parents | |
children |
line wrap: on
line source
<tool id="qiime_assign_taxonomy" name="assign taxonomy" version="1.9.1galaxy1"> <description>Assign taxonomy to each sequence</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <command> <![CDATA[ assign_taxonomy.py -i $input_fasta_fp #if str($id_to_taxonomy_fp) != 'None': -t $id_to_taxonomy_fp #end if #if str($reference_seqs_fp) != 'None': -r $reference_seqs_fp #end if #if str($methodcond.assignment_method) = 'None': -m uclust #end if #if str($methodcond.assignment_method) != 'None': -m $methodcond.assignment_method #end if #if $methodcond.assignment_method == "rtax": #if $methodcond.single_ok: --single_ok #end if #if $methodcond.no_single_ok_generic: --no_single_ok_generic #end if #if str($methodcond.read_id_regex): --read_id_regex=$methodcond.read_id_regex #end if #if str($methodcond.amplicon_id_regex): --amplicon_id_regex=$methodcond.amplicon_id_regex #end if #if str($methodcond.header_id_regex): --header_id_regex=$methodcond.header_id_regex #end if #end if #if $methodcond.assignment_method == "sortmerna": #if str($methodcond.sortmerna_db): --sortmerna_db=$methodcond.sortmerna_db #end if #if $methodcond.sortmerna_e_value: --sortmerna_e_value=$methodcond.sortmerna_e_value #end if #if $methodcond.sortmerna_coverage: --sortmerna_coverage=$methodcond.sortmerna_coverage #end if #if $methodcond.sortmerna_best_N_alignments: --sortmerna_best_N_alignments=$methodcond.sortmerna_best_N_alignments #end if #if str($methodcond.sortmerna_threads): --sortmerna_threads=$methodcond.sortmerna_threads #end if #if $methodcond.min_consensus_fraction: --min_consensus_fraction=$methodcond.min_consensus_fraction #end if #if $methodcond.similarity: --similarity=$methodcond.similarity #end if #end if #if $methodcond.assignment_method == "blast": #if str($methodcond.blast_db) != 'None': -b \$BLAST_DB_NAME #end if #if $methodcond.blast_e_value: -e $methodcond.blast_e_value #end if #end if #if $methodcond.assignment_method == "rdp": #if $methodcond.confidence: -c $methodcond.confidence #end if #if $methodcond.rdp_max_memory: --rdp_max_memory=$methodcond.rdp_max_memory #end if #end if #if $methodcond.assignment_method == "mothur": #if $methodcond.confidence: -c $methodcond.confidence #end if #end if #if $methodcond.assignment_method == "uclust": #if $methodcond.min_consensus_fraction: --min_consensus_fraction=$methodcond.min_consensus_fraction #end if #if $methodcond.similarity: --similarity=$methodcond.similarity #end if #if $methodcond.uclust_max_accepts: --uclust_max_accepts=$methodcond.uclust_max_accepts #end if #end if -o assign_taxonomy_output ]]> </command> <inputs> <param label="-i/--input_fasta_fp: path to the input fasta file" name="input_fasta_fp" optional="False" type="data"/> <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt" label="-t/--id_to_taxonomy_fp: Path to tab-delimited file mapping sequences to assigned taxonomy. Each assigned taxonomy is provided as a semicolon-separated list. For assignment with rdp, each assigned taxonomy must be exactly 6 levels deep. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt]" name="id_to_taxonomy_fp" optional="True" type="data"/> <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta" label="-r/--reference_seqs_fp: Path to reference sequences. For assignment with blast, these are used to generate a blast database. For assignment with rdp, they are used as training sequences for the classifier. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta]" name="reference_seqs_fp" optional="True" type="data"/> <conditional name="methodcond"> <param label="-m/--assignment_method: Taxon assignment method, must be one of rdp, blast, rtax, mothur, uclust, sortmerna [default: uclust]" name="assignment_method" optional="False" type="select"> <option selected="True" value="uclust">uclust</option> <option value="rdp">rdp</option> <option value="blast">blast</option> <option value="rtax">rtax</option> <option value="mothur">mothur</option> <option value="sortmerna">sortmerna</option> </param> <when value="rtax"> <param label="--single_ok: When classifying paired ends, allow fallback to single-ended classification when the mate pair is lacking (used for RTAX only). [default: False]" name="single_ok" selected="False" type="boolean"/> <param label="--no_single_ok_generic: When classifying paired ends, do not allow fallback to single-ended classification when the mate pair is overly generic (used for RTAX only). [default: False]" name="no_single_ok_generic" selected="False" type="boolean"/> <param default="\S+\s+(\S+)" label="--read_id_regex: Used to parse the result of OTU clustering, to get the read_1_id for each clusterID. The clusterID itself is assumed to be the first field, and is not captured by the regex. (used for RTAX only). [default: \S+\s+(\S+)]" name="read_id_regex" optional="True" type="text"/> <param default="(\S+)\s+(\S+?)\/" label="--amplicon_id_regex: Used to parse the result of split_libraries, to get the ampliconID for each read_1_id. Two groups capture read_1_id and ampliconID, respectively. (used for RTAX only). [default: (\S+)\s+(\S+?)\/]" name="amplicon_id_regex" optional="True" type="text"/> <param default="\S+\s+(\S+?)\/" label="--header_id_regex: Used to parse the result of split_libraries, to get the portion of the header that RTAX uses to match mate pairs. The default uses the amplicon ID, not including /1 or /3, as the primary key for the query sequences. Typically this regex will be the same as amplicon_id_regex, except that only the second group is captured. (used for RTAX only). [default: \S+\s+(\S+?)\/]" name="header_id_regex" optional="True" type="text"/> </when> <when value="sortmerna"> <param label="--sortmerna_db: Pre-existing database to search against when using sortmerna [default: None]" name="sortmerna_db" optional="True" type="text"/> <param default="1.0" label="--sortmerna_e_value: Maximum E-value when clustering [default = 1.0]" name="sortmerna_e_value" optional="True" type="float"/> <param default="0.9" label="--sortmerna_coverage: Mininum percent query coverage (of an alignment) to consider a hit, expressed as a fraction between 0 and 1 [default: 0.9]" name="sortmerna_coverage" optional="True" type="float"/> <param default="5" label="--sortmerna_best_N_alignments: This option specifies how many best alignments per read will be written [default: 5]" name="sortmerna_best_N_alignments" optional="True" type="integer"/> <param default="1" label="--sortmerna_threads: Specify number of threads to be used for sortmerna mapper which utilizes multithreading. [default: 1]" name="sortmerna_threads" optional="True" type="text"/> <param default="0.51" label="--min_consensus_fraction: Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query, only used for sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction" optional="True" type="float"/> <param default="0.9" label="--similarity: Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit, only used for sortmerna and uclust methods [default: 0.9]" name="similarity" optional="True" type="float"/> </when> <when value="blast"> <param label="-b/--blast_db: Database to blast against. Must provide either --blast_db or --reference_seqs_db for assignment with blast [default: None]" name="blast_db" optional="True" type="data"/> <param default="0.001" label="-e/--blast_e_value: Maximum e-value to record an assignment, only used for blast method [default: 0.001]" name="blast_e_value" optional="True" type="float"/> </when> <when value="rdp"> <param default="0.5" label="-c/--confidence: Minimum confidence to record an assignment, only used for rdp and mothur methods [default: 0.5]" name="confidence" optional="True" type="float"/> <param default="4000" label="--rdp_max_memory: Maximum memory allocation, in MB, for Java virtual machine when using the rdp method. Increase for large training sets [default: 4000]" name="rdp_max_memory" optional="True" type="integer"/> </when> <when value="mothur"> <param default="0.5" label="-c/--confidence: Minimum confidence to record an assignment, only used for rdp and mothur methods [default: 0.5]" name="confidence" optional="True" type="float"/> </when> <when value="uclust"> <param default="0.51" label="--min_consensus_fraction: Minimum fraction of database hits that must have a specific taxonomic assignment to assign that taxonomy to a query, only used for sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction" optional="True" type="float"/> <param default="0.9" label="--similarity: Minimum percent similarity (expressed as a fraction between 0 and 1) to consider a database match a hit, only used for sortmerna and uclust methods [default: 0.9]" name="similarity" optional="True" type="float"/> <param default="3" label="--uclust_max_accepts: Number of database hits to consider when making an assignment, only used for uclust method [default: 3]" name="uclust_max_accepts" optional="True" type="integer"/> </when> </conditional> </inputs> <outputs> <data format="txt" from_work_dir="assign_taxonomy_output/*.log" label="tax_assignements.log" name="tax_assignements.log"/> <data format="txt" from_work_dir="assign_taxonomy_output/*.txt" label="tax_assignements.txt" name="tax_assignements.txt"/> </outputs> <tests> <test> </test> </tests> <help><![CDATA[ **What it does** Contains code for assigning taxonomy, using several techniques. Given a set of sequences, %prog attempts to assign the taxonomy of each sequence. Currently the methods implemented are assignment with BLAST, the RDP classifier, RTAX, mothur, and uclust. The output of this step is an observation metadata mapping file of input sequence identifiers (1st column of output file) to taxonomy (2nd column) and quality score (3rd column). There may be method-specific information in subsequent columns. Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in the Greengenes reference OTU builds. To get the latest build of the Greengenes OTUs (and other marker gene OTU collections), follow the "Resources" link from http://qiime.org. After downloading and unzipping you can use the following files as -r and -t, where <otus_dir> is the name of the new directory after unzipping the reference OTUs tgz file. -r <otus_dir>/rep_set/97_otus.fasta -t <otus_dir></otus_dir>/taxonomy/97_otu_taxonomy.txt The consensus taxonomy assignment implemented here is the most detailed lineage description shared by 90% or more of the sequences within the OTU (this level of agreement can be adjusted by the user). The full lineage information for each sequence is one of the output files of the analysis. In addition, a conflict file records cases in which a phylum-level taxonomy assignment disagreement exists within an OTU (such instances are rare and can reflect sequence misclassification within the greengenes database). ]]> </help> <citations> <expand macro="citations" /> </citations> </tool>