diff pick_open_reference_otus.xml @ 0:c1bd0c560018 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author bebatut
date Tue, 02 Feb 2016 05:50:37 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pick_open_reference_otus.xml	Tue Feb 02 05:50:37 2016 -0500
@@ -0,0 +1,295 @@
+<tool id="pick_open_reference_otus" name="Perform open-reference OTU picking" 
+    version="1.9.1">
+    
+    <description></description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements" />
+
+	<version_command><![CDATA[
+        pick_open_reference_otus.py --version
+    ]]></version_command>
+
+	<command><![CDATA[
+        pick_open_reference_otus.py
+
+            -i $input_seq_files
+
+            -o otus
+
+            -m $otu_picking_method
+
+            #if str( $reference_sequences.reference_sequences_selector ) == 'history'
+                #set $ref = $reference_sequences.history_database
+            #else:
+                #set $data_table = dict([(_[0], _[2]) for _ in $reference_sequences.cached_database.input.options.tool_data_table.data])
+                #set $db = $reference_sequences.cached_database.value
+                #set $ref = $data_table[$db]
+            #end if
+            -r $ref
+
+            #if $prefilter_refseqs_fp.difference_test
+                #if str( $prefilter_refseqs_fp.prefilter_reference_sequences.reference_sequences_selector ) == 'history'
+                    #set $ref = $prefilter_refseqs_fp.prefilter_reference_sequences.history_database
+                #else:
+                    #set $data_table = dict([(_[0], _[2]) for _ in $prefilter_refseqs_fp.prefilter_reference_sequences.cached_database.input.options.tool_data_table.data])
+                    #set $db = $prefilter_refseqs_fp.prefilter_reference_sequences.cached_database.value
+                    #set $ref = $data_table[$db]
+                #end if
+                --prefilter_refseqs_fp $ref
+            #end if
+
+            -n $new_ref_set_id
+
+            $parallel
+            -O "\${GALAXY_SLOTS:-4}"
+
+            -s $percent_subsample
+            --prefilter_percent_id $prefilter_percent_id
+
+            #if str($step1_otu_map_fp) != 'None':
+                --step1_otu_map_fp $step1_otu_map_fp
+            #end if
+
+            #if str($step1_failures_fasta_fp) != 'None':
+                --step1_failures_fasta_fp=$step1_failures_fasta_fp
+            #end if
+
+            --minimum_failure_threshold $minimum_failure_threshold
+
+            $suppress_step4
+
+            --min_otu_size $min_otu_size
+
+            $suppress_taxonomy_assignment
+            $suppress_align_and_tree
+
+        &&
+
+        biom convert 
+            -i otus/otu_table_mc2.biom 
+            -o $output_otu_table 
+            --to-tsv 
+            --header-key taxonomy
+
+        #if not $suppress_taxonomy_assignment
+            && 
+            biom convert 
+            -i otus/otu_table_mc2_w_tax.biom
+            -o $output_otu_table_taxonomy 
+            --to-tsv 
+            --header-key taxonomy
+        #end if
+
+        #if not $suppress_align_and_tree
+            &&
+            biom convert 
+            -i otus/otu_table_mc_no_pynast_failures.biom
+            -o $output_otu_table_no_failures 
+            --to-tsv 
+            --header-key taxonomy
+        #end if
+
+        ]]>
+	</command>
+
+	<inputs>
+        <param name="input_seq_files" type="data" format="fasta,fastq" 
+            label="Input sequence files" multiple="True" help="(-i/--input_fps)"/> 
+
+        <param name="otu_picking_method" label="The OTU picking method to use for 
+            reference and de novo steps" type="select" 
+            help="-m/--otu_picking_method">
+            <option value="uclust" selected="True">uclust</option>
+            <option value="usearch61">usearch61</option>
+            <option value="sortmerna_sumaclust">sortmerna_sumaclust</option>
+        </param>
+        
+        <conditional name="reference_sequences">
+            <param name="reference_sequences_selector" type="select" 
+                label="Reference sequences to query"
+                help="(-r, --reference_fp)">
+                <option value="cached" selected="True">Public databases</option>
+                <option value="history">Databases from your history</option>
+            </param>
+            <when value="cached">
+                <param name="cached_database" label="QIIME databases of reference 
+                    sequences" type="select">
+                    <options from_data_table="qiime_reference_db" />
+                </param>
+            </when>
+            <when value="history">
+                <param name="history_database" type="data" format="fasta" 
+                    label="Reference databases"/>
+            </when>
+        </conditional>
+
+        <conditional name="prefilter_refseqs_fp">
+            <param name="difference_test" type="boolean" label="Use different 
+                reference sequences for the prefilter?" checked="False" truevalue="true"
+                falsevalue="false" help="(--prefilter_refseqs_fp)" />
+            <when value="true">
+                <conditional name="prefilter_reference_sequences">
+                    <param name="reference_sequences_selector" type="select" 
+                        label="Reference sequences to query" >
+                        <option value="cached" selected="True">Public databases</option>
+                        <option value="history">Databases from your history</option>
+                    </param>
+                    <when value="cached">
+                        <param name="cached_database" label="QIIME databases of reference 
+                            sequences" type="select">
+                            <options from_data_table="qiime_reference_db" />
+                        </param>
+                    </when>
+                    <when value="history">
+                        <param name="history_database" type="data" format="fasta" 
+                            label="Reference databases"/>
+                    </when>
+                </conditional>
+            </when>
+            <when value="false" />
+        </conditional>
+
+        <param name="new_ref_set_id" type="text" value="New" label="Unique 
+            identifier for OTUs that get created in this ref set" 
+            help="This is useful to support combining of reference sets 
+            (-n/--new_ref_set_id)"/>
+
+        <param name="parallel" type="boolean" label="Run in parallel where 
+            available?" truevalue="--parallel" falsevalue="" checked="False" 
+            help="(-a, --parallel)" />
+
+        <param name="percent_subsample" type="float" value="0.001" min="0" max="1"
+            label="Percent of failure sequences to include in the subsample to 
+            cluster de novo" help="Large numbers should give more comprehensive 
+            results but will be slower(-s/--percent_subsample)"/>
+
+        <param name="prefilter_percent_id" type="float" value="0.0" min="0" max="1"
+            label="Percent of sequence pre-cluster against the reference" 
+            help="Any reads which fail to hit are discarded (a quality filter)
+            (--prefilter_percent_id)"/>
+
+        <param name="step1_otu_map_fp" type="data" format="txt" label="Reference 
+            OTU picking OTU map to avoid rebuilding if one has already been built 
+            (optional)" optional="True" help="This must be an OTU map generated 
+            by this workflow, not (for example) by pick_closed_reference_otus 
+            (--step1_otu_map_fp)" />
+
+        <param name="step1_failures_fasta_fp" type="data" format="fasta" 
+            label="Reference OTU picking failures fasta filepath, to avoid 
+            rebuilding if one has already been built (optional)" optional="True" 
+            help="This must be a failures file generated by this workflow, not 
+            (for example) by pick_closed_reference_otus (--step1_failures_fasta_fp)" />
+        
+        <param name="minimum_failure_threshold" type="integer" value="100000" 
+            label="Minimum number of sequences that must fail to hit the reference 
+            for subsampling to be performed" help="If fewer than this number of 
+            sequences fail to hit the reference, the de novo clustering step will 
+            run serially rather than invoking the subsampled open reference approach 
+            to improve performance (--minimum_failure_threshold)"/>
+
+        <param name="suppress_step4" type="boolean" label="Suppress the final de 
+            novo OTU picking step?" truevalue="--suppress_step4" falsevalue="" 
+            checked="False" help="It may be necessary for extremely large data 
+            sets(--suppress_step4)" />
+
+        <param name="min_otu_size" type="integer" value="2" label="Minimum otu 
+            size to retain the OTU" help="(--min_otu_size)"/>
+
+        <param name="suppress_taxonomy_assignment" type="boolean" 
+            label="Skip the taxonomy assignment step?" 
+            truevalue="--suppress_taxonomy_assignment" falsevalue="" 
+            checked="False" help="It results in an OTU table without taxonomy
+            (--suppress_taxonomy_assignment)" />
+
+        <param name="suppress_align_and_tree" type="boolean" 
+            label="Skip the sequence alignment and tree-building steps?" 
+            truevalue="--suppress_align_and_tree" falsevalue="" 
+            checked="False" help="(--suppress_align_and_tree)" />
+	</inputs>
+
+	<outputs>
+        <data name="output_rep_set" format="fasta" 
+            from_work_dir="otus/rep_set.fna"
+            label="${tool.name} on ${on_string}: OTU representative sequences"/>
+        <data name="output_ref_set" format="fasta" 
+            from_work_dir="otus/new_refseqs.fna"
+            label="${tool.name} on ${on_string}: New reference sequences (OTU + 
+            input reference sequences)"/>
+        <data name="output_otu_table" format="tsv" 
+            label="${tool.name} on ${on_string}: OTU table"/>
+        <data name="output_otu_table_taxonomy" format="tsv"
+            label="${tool.name} on ${on_string}: OTU table with taxonomy">
+            <filter>suppress_taxonomy_assignment is False</filter>
+        </data>
+        <data name="output_otu_table_no_failures" format="tsv" 
+            label="${tool.name} on ${on_string}: OTU without sequences that 
+            failed to align">
+            <filter>suppress_align_and_tree is False</filter>
+        </data>
+        <data name="output_otu_map" format="txt" 
+            from_work_dir="otus/final_otu_map_mc2.txt"
+            label="${tool.name} on ${on_string}: OTU map"/>
+        <data name="output_rep_set_tree" format="txt" 
+            from_work_dir="otus/rep_set.tre"
+            label="${tool.name} on ${on_string}: representative set tree">
+            <filter>suppress_align_and_tree is False</filter>
+        </data>
+	</outputs>
+
+	<tests>
+        <test>
+            <param name="input_seq_files" 
+                value="split_fastq_libraries_sequences.fasta"/> 
+            <param name="otu_picking_method" value="uclust"/>
+            <param name="reference_sequences_selector" value="history"/>
+            <param name="history_database" value="gg_13_8_97_otus.fasta"/>
+            <param name="difference_test" value="false"/>
+            <param name="new_ref_set_id" value="New" />
+            <param name="parallel" value="--parallel"/>
+            <param name="percent_subsample" value="0.001" />
+            <param name="prefilter_percent_id" value="0.0" />            
+            <param name="minimum_failure_threshold" value="100000" />
+            <param name="suppress_step4" value=""/>
+            <param name="min_otu_size" value="2" />
+            <param name="suppress_taxonomy_assignment" value="" />
+            <param name="suppress_align_and_tree" value="" />
+
+            <output name="output_rep_set" 
+                value="pick_open_reference_otus_representative_sequences.fasta"/>
+            <output name="output_ref_set" 
+                value="pick_open_reference_otus_reference_sequences.fasta"/>
+            <output name="output_otu_table" 
+                value="pick_open_reference_otus_otu_table.txt"/>
+            <output name="output_otu_table_taxonomy" 
+                value="pick_open_reference_otus_otu_table_with_taxo.txt" />
+            <output name="output_otu_map" 
+                value="pick_open_reference_otus_otu_map.txt"/>
+            <output name="output_otu_table_no_failures" 
+                value="pick_open_reference_otus_otu_table_with_taxo_without_pynast_failure.txt" />
+            <output name="output_rep_set_tree" 
+                value="pick_open_reference_otus_representative_set_tree.txt" />
+        </test>
+	</tests>
+
+	<help><![CDATA[
+
+**What it does**
+
+This tool performs open-reference OTU picking from sequences.
+
+This script is broken down into 4 possible OTU picking steps, and 2 steps
+involving the creation of OTU tables and trees. 
+
+More information about this tool is available on 
+`QIIME documentation <http://qiime.org/scripts/pick_open_reference_otus.html>`_.
+]]>
+    </help>
+
+    <citations>
+        <expand macro="citations" />
+    </citations>
+</tool>