Mercurial > repos > bebatut > qiime
diff assign_taxonomy.xml @ 0:c1bd0c560018 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author | bebatut |
---|---|
date | Tue, 02 Feb 2016 05:50:37 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assign_taxonomy.xml Tue Feb 02 05:50:37 2016 -0500 @@ -0,0 +1,304 @@ +<tool id="qiime_assign_taxonomy" name="assign taxonomy" version="1.9.1galaxy1"> + + <description>Assign taxonomy to each sequence</description> + + <macros> + <import>macros.xml</import> + </macros> + + <expand macro="requirements" /> + + <command> +<![CDATA[ + assign_taxonomy.py -i $input_fasta_fp + + #if str($id_to_taxonomy_fp) != 'None': + -t $id_to_taxonomy_fp + #end if + + #if str($reference_seqs_fp) != 'None': + -r $reference_seqs_fp + #end if + + #if str($methodcond.assignment_method) = 'None': + -m uclust + #end if + + #if str($methodcond.assignment_method) != 'None': + -m $methodcond.assignment_method + #end if + + #if $methodcond.assignment_method == "rtax": + + #if $methodcond.single_ok: + --single_ok + #end if + + #if $methodcond.no_single_ok_generic: + --no_single_ok_generic + #end if + + #if str($methodcond.read_id_regex): + --read_id_regex=$methodcond.read_id_regex + #end if + + #if str($methodcond.amplicon_id_regex): + --amplicon_id_regex=$methodcond.amplicon_id_regex + #end if + + #if str($methodcond.header_id_regex): + --header_id_regex=$methodcond.header_id_regex + #end if + #end if + + #if $methodcond.assignment_method == "sortmerna": + + #if str($methodcond.sortmerna_db): + --sortmerna_db=$methodcond.sortmerna_db + #end if + + #if $methodcond.sortmerna_e_value: + --sortmerna_e_value=$methodcond.sortmerna_e_value + #end if + + #if $methodcond.sortmerna_coverage: + --sortmerna_coverage=$methodcond.sortmerna_coverage + #end if + + #if $methodcond.sortmerna_best_N_alignments: + --sortmerna_best_N_alignments=$methodcond.sortmerna_best_N_alignments + #end if + + #if str($methodcond.sortmerna_threads): + --sortmerna_threads=$methodcond.sortmerna_threads + #end if + + #if $methodcond.min_consensus_fraction: + --min_consensus_fraction=$methodcond.min_consensus_fraction + #end if + + #if $methodcond.similarity: + --similarity=$methodcond.similarity + #end if + #end if + + #if $methodcond.assignment_method == "blast": + + #if str($methodcond.blast_db) != 'None': + -b \$BLAST_DB_NAME + #end if + + #if $methodcond.blast_e_value: + -e $methodcond.blast_e_value + #end if + #end if + + #if $methodcond.assignment_method == "rdp": + + #if $methodcond.confidence: + -c $methodcond.confidence + #end if + + #if $methodcond.rdp_max_memory: + --rdp_max_memory=$methodcond.rdp_max_memory + #end if + #end if + + #if $methodcond.assignment_method == "mothur": + + #if $methodcond.confidence: + -c $methodcond.confidence + #end if + #end if + + #if $methodcond.assignment_method == "uclust": + + #if $methodcond.min_consensus_fraction: + --min_consensus_fraction=$methodcond.min_consensus_fraction + #end if + + #if $methodcond.similarity: + --similarity=$methodcond.similarity + #end if + + #if $methodcond.uclust_max_accepts: + --uclust_max_accepts=$methodcond.uclust_max_accepts + #end if + #end if + -o assign_taxonomy_output +]]> + </command> + + <inputs> + <param label="-i/--input_fasta_fp: path to the input fasta file" + name="input_fasta_fp" optional="False" type="data"/> + <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt" + label="-t/--id_to_taxonomy_fp: Path to tab-delimited file mapping + sequences to assigned taxonomy. Each assigned taxonomy is provided as + a semicolon-separated list. For assignment with rdp, each assigned + taxonomy must be exactly 6 levels deep. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt]" + name="id_to_taxonomy_fp" optional="True" type="data"/> + <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta" + label="-r/--reference_seqs_fp: Path to reference sequences. For + assignment with blast, these are used to generate a blast database. + For assignment with rdp, they are used as training sequences for the + classifier. [default: /home12/caparmor/bioinfo/softs/sources/Qiime/data/gg_13_8_otus/rep_set/99_otus.fasta]" + name="reference_seqs_fp" optional="True" type="data"/> + + <conditional name="methodcond"> + <param label="-m/--assignment_method: Taxon assignment method, must be + one of rdp, blast, rtax, mothur, uclust, sortmerna [default: uclust]" + name="assignment_method" optional="False" type="select"> + <option selected="True" value="uclust">uclust</option> + <option value="rdp">rdp</option> + <option value="blast">blast</option> + <option value="rtax">rtax</option> + <option value="mothur">mothur</option> + <option value="sortmerna">sortmerna</option> + </param> + <when value="rtax"> + <param label="--single_ok: When classifying paired ends, allow + fallback to single-ended classification when the mate pair is + lacking (used for RTAX only). [default: False]" name="single_ok" + selected="False" type="boolean"/> + <param label="--no_single_ok_generic: When classifying paired ends, + do not allow fallback to single-ended classification when the + mate pair is overly generic (used for RTAX only). [default: False]" + name="no_single_ok_generic" selected="False" type="boolean"/> + <param default="\S+\s+(\S+)" label="--read_id_regex: Used to parse + the result of OTU clustering, to get the read_1_id for each + clusterID. The clusterID itself is assumed to be the first + field, and is not captured by the regex. (used for RTAX only). + [default: \S+\s+(\S+)]" name="read_id_regex" optional="True" + type="text"/> + <param default="(\S+)\s+(\S+?)\/" label="--amplicon_id_regex: Used + to parse the result of split_libraries, to get the ampliconID + for each read_1_id. Two groups capture read_1_id and ampliconID, + respectively. (used for RTAX only). [default: (\S+)\s+(\S+?)\/]" + name="amplicon_id_regex" optional="True" type="text"/> + <param default="\S+\s+(\S+?)\/" label="--header_id_regex: Used to + parse the result of split_libraries, to get the portion of the + header that RTAX uses to match mate pairs. The default uses + the amplicon ID, not including /1 or /3, as the primary key + for the query sequences. Typically this regex will be the + same as amplicon_id_regex, except that only the second group + is captured. (used for RTAX only). [default: \S+\s+(\S+?)\/]" + name="header_id_regex" optional="True" type="text"/> + </when> + <when value="sortmerna"> + <param label="--sortmerna_db: Pre-existing database to search + against when using sortmerna [default: None]" name="sortmerna_db" + optional="True" type="text"/> + <param default="1.0" label="--sortmerna_e_value: Maximum E-value + when clustering [default = 1.0]" name="sortmerna_e_value" + optional="True" type="float"/> + <param default="0.9" label="--sortmerna_coverage: Mininum percent + query coverage (of an alignment) to consider a hit, expressed + as a fraction between 0 and 1 [default: 0.9]" + name="sortmerna_coverage" optional="True" type="float"/> + <param default="5" label="--sortmerna_best_N_alignments: This option + specifies how many best alignments per read will be written + [default: 5]" name="sortmerna_best_N_alignments" optional="True" + type="integer"/> + <param default="1" label="--sortmerna_threads: Specify number of + threads to be used for sortmerna mapper which utilizes multithreading. + [default: 1]" name="sortmerna_threads" optional="True" + type="text"/> + <param default="0.51" label="--min_consensus_fraction: Minimum + fraction of database hits that must have a specific taxonomic + assignment to assign that taxonomy to a query, only used for + sortmerna and uclust methods [default: 0.51]" + name="min_consensus_fraction" optional="True" type="float"/> + <param default="0.9" label="--similarity: Minimum percent similarity + (expressed as a fraction between 0 and 1) to consider a database + match a hit, only used for sortmerna and uclust methods + [default: 0.9]" name="similarity" optional="True" type="float"/> + </when> + <when value="blast"> + <param label="-b/--blast_db: Database to blast against. Must provide + either --blast_db or --reference_seqs_db for assignment with blast + [default: None]" name="blast_db" optional="True" type="data"/> + <param default="0.001" label="-e/--blast_e_value: Maximum e-value + to record an assignment, only used for blast method [default: + 0.001]" name="blast_e_value" optional="True" type="float"/> + </when> + <when value="rdp"> + <param default="0.5" label="-c/--confidence: Minimum confidence to + record an assignment, only used for rdp and mothur methods + [default: 0.5]" name="confidence" optional="True" type="float"/> + <param default="4000" label="--rdp_max_memory: Maximum memory + allocation, in MB, for Java virtual machine when using the + rdp method. Increase for large training sets [default: 4000]" + name="rdp_max_memory" optional="True" type="integer"/> + </when> + <when value="mothur"> + <param default="0.5" label="-c/--confidence: Minimum confidence to + record an assignment, only used for rdp and mothur methods + [default: 0.5]" name="confidence" optional="True" type="float"/> + </when> + <when value="uclust"> + <param default="0.51" label="--min_consensus_fraction: Minimum + fraction of database hits that must have a specific taxonomic + assignment to assign that taxonomy to a query, only used for + sortmerna and uclust methods [default: 0.51]" name="min_consensus_fraction" + optional="True" type="float"/> + <param default="0.9" label="--similarity: Minimum percent similarity + (expressed as a fraction between 0 and 1) to consider a database + match a hit, only used for sortmerna and uclust methods [default: + 0.9]" name="similarity" optional="True" type="float"/> + <param default="3" label="--uclust_max_accepts: Number of database + hits to consider when making an assignment, only used for uclust + method [default: 3]" name="uclust_max_accepts" optional="True" + type="integer"/> + </when> + </conditional> + </inputs> + + <outputs> + <data format="txt" from_work_dir="assign_taxonomy_output/*.log" + label="tax_assignements.log" name="tax_assignements.log"/> + <data format="txt" from_work_dir="assign_taxonomy_output/*.txt" + label="tax_assignements.txt" name="tax_assignements.txt"/> + </outputs> + + <tests> + <test> + </test> + </tests> + + <help><![CDATA[ +**What it does** + +Contains code for assigning taxonomy, using several techniques. + +Given a set of sequences, %prog attempts to assign the taxonomy of each sequence. +Currently the methods implemented are assignment with BLAST, the RDP classifier, +RTAX, mothur, and uclust. The output of this step is an observation metadata +mapping file of input sequence identifiers (1st column of output file) to taxonomy +(2nd column) and quality score (3rd column). There may be method-specific information +in subsequent columns. + +Reference data sets and id-to-taxonomy maps for 16S rRNA sequences can be found in +the Greengenes reference OTU builds. To get the latest build of the Greengenes OTUs +(and other marker gene OTU collections), follow the "Resources" link from http://qiime.org. +After downloading and unzipping you can use the following files as -r and -t, where +<otus_dir> is the name of the new directory after unzipping the reference OTUs tgz + file. + +-r <otus_dir>/rep_set/97_otus.fasta +-t <otus_dir></otus_dir>/taxonomy/97_otu_taxonomy.txt + +The consensus taxonomy assignment implemented here is the most detailed lineage +description shared by 90% or more of the sequences within the OTU (this level of +agreement can be adjusted by the user). The full lineage information for each +sequence is one of the output files of the analysis. In addition, a conflict file +records cases in which a phylum-level taxonomy assignment disagreement exists +within an OTU (such instances are rare and can reflect sequence misclassification +within the greengenes database). + ]]> + </help> + + <citations> + <expand macro="citations" /> + </citations> +</tool>