Mercurial > repos > bebatut > qiime
view pick_otus.xml @ 0:c1bd0c560018 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author | bebatut |
---|---|
date | Tue, 02 Feb 2016 05:50:37 -0500 |
parents | |
children |
line wrap: on
line source
<tool id="qiime_pick_otus" name="pick otus" version="1.9.1galaxy1"> <description>OTU picking</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <command> <![CDATA[ pick_otus.py -i $input_seqs_filepath -o fastasplit #if str($methode.otu_picking_method) != 'None': -m $methode.otu_picking_method #end if #if str($methode.otu_picking_method) in ("uclust_ref","usearch_ref") : -r $methode.refseqs_fp #end if #if str($methode.otu_picking_method) in ("uclust","uclust_ref","usearch","usearch_ref","sumaclust") : #if $methode.similarity: -s $methode.similarity #end if #end if #if str($methode.otu_picking_method) == "sumaclust": #if $methode.sumaclust_exact: --sumaclust_exact #end if #end if #if str($methode.otu_picking_method) == "swarm": #if $methode.swarm_resolution: --swarm_resolution=$methode.swarm_resolution #end if #end if #if str($methode.otu_picking_method) in ("uclust","uclust_ref","usearch","usearch_ref"): #if $methode.enable_rev_strand_match: -z #end if #if str($methode.max_accepts): --max_accepts=$methode.max_accepts #end if #if str($methode.max_rejects): --max_rejects=$methode.max_rejects #end if #end if #if str($methode.otu_picking_method) in ("uclust","uclust_ref"): #if $methode.stepwords: --stepwords=$methode.stepwords #end if #if $methode.suppress_presort_by_abundance_uclust: -D #end if #end if #if str($methode.otu_picking_method) == "uclust": #if $methode.optimal_uclust: -A #end if #if $methode.exact_uclust: -E #end if #end if #if str($methode.otu_picking_method) == "usearch": #if $methode.percent_id_err: -j $methode.percent_id_err #end if #if $methode.abundance_skew: -a $methode.abundance_skew #end if #if str($methode.db_filepath) != 'None': -f $methode.db_filepath #end if #if $methode.perc_id_blast: --perc_id_blast=$methode.perc_id_blast #end if #if $methode.suppress_de_novo_chimera_detection: -k #end if #end if #if str($methode.otu_picking_method) in ("sumaclust","swarm"): #if str($methode.threads): --threads=$methode.threads #end if #end if #if $prefix_prefilter_length: -n $prefix_prefilter_length #end if #if $prefix_length: -p $prefix_length #end if #if $suffix_length: -u $suffix_length #end if #if str($non_chimeras_retention): -F $non_chimeras_retention #end if ]]> </command> <inputs> <param label="-i/--input_seqs_filepath: Path to input sequences file" name="input_seqs_filepath" optional="False" type="data"/> <conditional name="methode"> <param label="-m/--otu_picking_method: Method for picking OTUs. Valid choices are: sortmerna, mothur, trie, uclust_ref, usearch, usearch_ref, blast, usearch61, usearch61_ref, sumaclust, swarm, prefix_suffix, cdhit, uclust. The mothur method requires an input file of aligned sequences. usearch will enable the usearch quality filtering pipeline. [default: uclust]" name="otu_picking_method" optional="FALSE" type="select"> <option selected="True" value="uclust">uclust</option> <option value="uclust_ref">uclust_ref</option> <option value="usearch">usearch</option> <option value="usearch_ref">usearch_ref</option> <option value="sumaclust">sumaclust</option> <option value="swarm">swarm</option> </param> <when value="uclust_ref"> <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/qiime-1.9.0/python_modules/lib/python2.7/site-packages/qiime_default_reference-0.1.1-py2.7.egg/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta" label="-r/--refseqs_fp: Path to reference sequences to search against when using -m uclust_ref, -m usearch_ref [default: /home12/caparmor/bioinfo/softs/sources/Qiime/qiime-1.9.0/python_modules/lib/python2.7/site-packages/qiime_default_reference-0.1.1-py2.7.egg/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta]" name="refseqs_fp" optional="True" type="data"/> <param default="0.97" label="-s/--similarity: Sequence similarity threshold (for blast, cdhit, uclust, uclust_ref, usearch, usearch_ref, usearch61, usearch61_ref, sumaclust or sortmerna [default: 0.97]" name="similarity" optional="True" type="float"/> <param label="-z/--enable_rev_strand_match: Enable reverse strand matching for uclust, uclust_ref, usearch, usearch_ref, usearch61, or usearch61_ref otu picking, will double the amount of memory used. [default: False]" name="enable_rev_strand_match" selected="False" type="boolean"/> <param label="-D/--suppress_presort_by_abundance_uclust: Suppress presorting of sequences by abundance when picking OTUs with uclust or uclust_ref [default: False]" name="suppress_presort_by_abundance_uclust" selected="False" type="boolean"/> <param label="-C/--suppress_new_clusters: Suppress creation of new clusters using seqs that don't match reference when using -m uclust_ref, -m usearch61_ref, or -m usearch_ref [default: False]" name="suppress_new_clusters" selected="False" type="boolean"/> <param default="default" label="--max_accepts: max_accepts value to uclust, uclust_ref, usearch61, and usearch61_ref. By default, will use value suggested by method (uclust: 1, usearch61: 1) [default: default]" name="max_accepts" optional="True" type="text"/> <param default="default" label="--max_rejects: max_rejects value for uclust, uclust_ref, usearch61, and usearch61_ref. With default settings, will use value recommended by clustering method used (uclust: 8, usearch61: 8 for usearch_fast_cluster option, 32 for reference and smallmem options) [default: default]" name="max_rejects" optional="True" type="text"/> <param default="8" label="--stepwords: stepwords value to uclust and uclust_ref [default: 8]" name="stepwords" optional="True" type="integer"/> </when> <when value="usearch_ref"> <param default="/home12/caparmor/bioinfo/softs/sources/Qiime/qiime-1.9.0/python_modules/lib/python2.7/site-packages/qiime_default_reference-0.1.1-py2.7.egg/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta" label="-r/--refseqs_fp: Path to reference sequences to search against when using -m blast, -m sortmerna, -m uclust_ref, -m usearch_ref, or -m usearch61_ref [default: /home12/caparmor/bioinfo/softs/sources/Qiime/qiime-1.9.0/python_modules/lib/python2.7/site-packages/qiime_default_reference-0.1.1-py2.7.egg/qiime_default_reference/gg_13_8_otus/rep_set/97_otus.fasta]" name="refseqs_fp" optional="True" type="data"/> <param default="0.97" label="-s/--similarity: Sequence similarity threshold (for blast, cdhit, uclust, uclust_ref, usearch, usearch_ref, usearch61, usearch61_ref, sumaclust or sortmerna [default: 0.97]" name="similarity" optional="True" type="float"/> <param label="-z/--enable_rev_strand_match: Enable reverse strand matching for uclust, uclust_ref, usearch, usearch_ref, usearch61, or usearch61_ref otu picking, will double the amount of memory used. [default: False]" name="enable_rev_strand_match" selected="False" type="boolean"/> <param label="-C/--suppress_new_clusters: Suppress creation of new clusters using seqs that don't match reference when using -m uclust_ref, -m usearch61_ref, or -m usearch_ref [default: False]" name="suppress_new_clusters" selected="False" type="boolean"/> <param default="default" label="--max_accepts: max_accepts value to uclust, uclust_ref, usearch61, and usearch61_ref. By default, will use value suggested by method (uclust: 1, usearch61: 1) [default: default]" name="max_accepts" optional="True" type="text"/> <param default="default" label="--max_rejects: max_rejects value for uclust, uclust_ref, usearch61, and usearch61_ref. With default settings, will use value recommended by clustering method used (uclust: 8, usearch61: 8 for usearch_fast_cluster option, 32 for reference and smallmem options) [default: default]" name="max_rejects" optional="True" type="text"/> </when> <when value="usearch"> <param default="0.97" value="0.97" label="-s/--similarity: Sequence similarity threshold (for blast, cdhit, uclust, uclust_ref, usearch, usearch_ref, usearch61, usearch61_ref, sumaclust or sortmerna [default: 0.97]" name="similarity" optional="True" type="float"/> <param label="-z/--enable_rev_strand_match: Enable reverse strand matching for uclust, uclust_ref, usearch, usearch_ref, usearch61, or usearch61_ref otu picking, will double the amount of memory used. [default: False]" name="enable_rev_strand_match" selected="False" type="boolean"/> <param default="default" label="--max_accepts: max_accepts value to uclust, uclust_ref, usearch61, and usearch61_ref. By default, will use value suggested by method (uclust: 1, usearch61: 1) [default: default]" name="max_accepts" optional="True" type="text"/> <param default="default" label="--max_rejects: max_rejects value for uclust, uclust_ref, usearch61, and usearch61_ref. With default settings, will use value recommended by clustering method used (uclust: 8, usearch61: 8 for usearch_fast_cluster option, 32 for reference and smallmem options) [default: default]" name="max_rejects" optional="True" type="text"/> <param default="0.97" value="0.97" label="-j/--percent_id_err: Percent identity threshold for cluster error detection with usearch, expressed as a fraction between 0 and 1. [default: 0.97]" name="percent_id_err" optional="True" type="float"/> <param default="2.0" label="-a/--abundance_skew: Abundance skew setting for de novo chimera detection with usearch. [default: 2.0]" name="abundance_skew" optional="True" type="float"/> <param default="None" label="-f/--db_filepath: Reference database of fasta sequences for reference based chimera detection with usearch. [default: None]" name="db_filepath" optional="True" type="data"/> <param default="0.97" value="0.97" label="--perc_id_blast: Percent ID for mapping OTUs created by usearch back to original sequence IDs [default: 0.97]" name="perc_id_blast" optional="True" type="float"/> <param label="-k/--suppress_de_novo_chimera_detection: Suppress de novo chimera detection in usearch. [default: False]" name="suppress_de_novo_chimera_detection" selected="False" type="boolean"/> <param label="--usearch_fast_cluster: Use fast clustering option for usearch or usearch61_ref with new clusters. --enable_rev_strand_match can not be enabled with this option, and the only valid option for usearch61_sort_method is 'length'. This option uses more memory than the default option for de novo clustering. [default: False]" name="usearch_fast_cluster" selected="False" type="boolean"/> </when> <when value="sumaclust"> <param default="0.97" value="0.97" label="-s/--similarity: Sequence similarity threshold (for blast, cdhit, uclust, uclust_ref, usearch, usearch_ref, usearch61, usearch61_ref, sumaclust or sortmerna [default: 0.97]" name="similarity" optional="True" type="float"/> <param label="--sumaclust_exact: A sequence is assigned to the best matching seed rather than the first matching seed passing the similarity threshold [default: False]" name="sumaclust_exact" selected="False" type="boolean"/> <param default="1" label="--threads: Specify number of threads (1 thread per core) to be used for usearch61, sortmerna, sumaclust and swarm commands that utilize multithreading. [default: 1]" name="threads" optional="True" type="text"/> </when> <when value="swarm"> <param default="1" label="--swarm_resolution: Maximum number of differences allowed between two amplicons, meaning that two amplicons will be grouped if they have integer (or less) differences (see Swarm manual at https://github.com/torognes/swarm for more details). [default: 1]" name="swarm_resolution" optional="True" type="integer"/> <param default="1" label="--threads: Specify number of threads (1 thread per core) to be used for usearch61, sortmerna, sumaclust and swarm commands that utilize multithreading. [default: 1]" name="threads" optional="True" type="text"/> </when> <when value="uclust"> <param default="0.97" value="0.97" label="-s/--similarity: Sequence similarity threshold (for blast, cdhit, uclust, uclust_ref, usearch, usearch_ref, usearch61, usearch61_ref, sumaclust or sortmerna [default: 0.97]" name="similarity" optional="True" type="float"/> <param label="-z/--enable_rev_strand_match: Enable reverse strand matching for uclust, uclust_ref, usearch, usearch_ref, usearch61, or usearch61_ref otu picking, will double the amount of memory used. [default: False]" name="enable_rev_strand_match" selected="False" type="boolean"/> <param label="-A/--optimal_uclust: Pass the --optimal flag to uclust for uclust otu picking. [default: False]" name="optimal_uclust" selected="False" type="boolean"/> <param label="-D/--suppress_presort_by_abundance_uclust: Suppress presorting of sequences by abundance when picking OTUs with uclust or uclust_ref [default: False]" name="suppress_presort_by_abundance_uclust" selected="False" type="boolean"/> <param default="default" label="--max_accepts: max_accepts value to uclust, uclust_ref, usearch61, and usearch61_ref. By default, will use value suggested by method (uclust: 1, usearch61: 1) [default: default]" name="max_accepts" optional="True" type="text"/> <param default="default" label="--max_rejects: max_rejects value for uclust, uclust_ref, usearch61, and usearch61_ref. With default settings, will use value recommended by clustering method used (uclust: 8, usearch61: 8 for usearch_fast_cluster option, 32 for reference and smallmem options) [default: default]" name="max_rejects" optional="True" type="text"/> <param default="8" label="--stepwords: stepwords value to uclust and uclust_ref [default: 8]" name="stepwords" optional="True" type="integer"/> <param label="-E/--exact_uclust: Pass the --exact flag to uclust for uclust otu picking. [default: False]" name="exact_uclust" selected="False" type="boolean"/> </when> </conditional> <param default="None" label="-n/--prefix_prefilter_length: Prefilter data so seqs with identical first prefix_prefilter_length are automatically grouped into a single OTU. This is useful for large sequence collections where OTU picking doesn't scale well [default: None; 100 is a good value]" name="prefix_prefilter_length" optional="True" type="integer"/> <param default="50" label="-p/--prefix_length: Prefix length when using the prefix_suffix otu picker; WARNING: CURRENTLY DIFFERENT FROM prefix_prefilter_length (-n)! [default: 50]" name="prefix_length" optional="True" type="integer"/> <param default="50" label="-u/--suffix_length: Suffix length when using the prefix_suffix otu picker [default: 50]" name="suffix_length" optional="True" type="integer"/> <param default="union" label="-F/--non_chimeras_retention: Selects subsets of sequences detected as non-chimeras to retain after de novo and reference based chimera detection. Options are intersection or union. union will retain sequences that are flagged as non-chimeric from either filter, while intersection will retain only those sequences that are flagged as non-chimeras from both detection methods. [default: union]" name="non_chimeras_retention" optional="True" type="text"/> </inputs> <outputs> <data format="txt" from_work_dir="fastasplit/*_otus.txt" name="pick_otus.txt" label="pick_otus.txt"/> <data format="txt" from_work_dir="fastasplit/*_otus.log" name="pick_otus.log" label="pick_otus.log"/> <data format="txt" from_work_dir="fastasplit/*_failures.txt" name="pick_otus_failures.txt" label="pick_otus_failures.txt"/> </outputs> <tests> <test> </test> </tests> <help><![CDATA[ **What it does** The OTU picking step assigns similar sequences to operational taxonomic units, or OTUs, by clustering sequences based on a user-defined similarity threshold. Sequences which are similar at or above the threshold level are taken to represent the presence of a taxonomic unit (e.g., a genus, when the similarity threshold is set at 0.94) in the sequence collection. Currently, the following clustering methods have been implemented in QIIME: 1. uclust, creates "seeds" of sequences which generate clusters based on percent identity. 2. uclust_ref, as uclust, but takes a reference database to use as seeds. New clusters can be toggled on or off. 3. usearch, creates "seeds" of sequences which generate clusters based on percent identity, filters low abundance clusters, performs de novo and reference based chimera detection. 4. usearch_ref, as usearch, but takes a reference database to use as seeds. New clusters can be toggled on or off. 5. sumaclust, creates "seeds" of sequences which generate clusters based on similarity threshold. 6. swarm, creates "seeds" of sequences which generate clusters based on a resolution threshold. Chimera checking with usearch 6.X is implemented in identify_chimeric_seqs.py. Chimera checking should be done first with usearch 6.X, and the filtered resulting fasta file can then be clustered. The primary inputs for pick_otus.py are: 1. A FASTA file containing sequences to be clustered 2. An OTU threshold (default is 0.97, roughly corresponding to species-level OTUs); 3. The method to be applied for clustering sequences into OTUs. pick_otus.py takes a standard fasta file as input. The output consists of two files (i.e. seqs_otus.txt and seqs_otus.log). The .txt file is composed of tab-delimited lines, where the first field on each line corresponds to an (arbitrary) cluster identifier, and the remaining fields correspond to sequence identifiers assigned to that cluster. Sequence identifiers correspond to those provided in the input FASTA file. Usearch (i.e. usearch quality filter) can additionally have log files for each intermediate call to usearch. Example lines from the resulting .txt file: = ==== ==== ==== 0 seq1 seq5 1 seq2 2 seq3 3 seq4 seq6 seq7 = ==== ==== ==== This result implies that four clusters were created based on 7 input sequences. The first cluster (cluster id 0) contains two sequences, sequence ids seq1 and seq5; the second cluster (cluster id 1) contains one sequence, sequence id seq2; the third cluster (cluster id 2) contains one sequence, sequence id seq3, and the final cluster (cluster id 3) contains three sequences, sequence ids seq4, seq6, and seq7. The resulting .log file contains a list of parameters passed to the pick_otus.py script along with the output location of the resulting .txt file.</help> ]]> </help> <citations> <expand macro="citations" /> <citation type="doi">10.1093/bioinformatics/btv231</citation> <citation type="doi">10.1093/bioinformatics/btq461</citation> <citation type="doi">10.1093/bioinformatics/bts611</citation> <citation type="doi">10.7287/peerj.preprints.386v1/supp-1</citation> </citations> </tool>