diff stacks_kmerfilter.xml @ 5:2277b0b50371 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 4e87a14a5479800df9675c1cbcdbe1b11f63653b-dirty
author matthias
date Wed, 27 Feb 2019 09:50:00 -0500
parents 7aec316566cb
children e0b1e96c4032
line wrap: on
line diff
--- a/stacks_kmerfilter.xml	Fri Jan 04 03:30:16 2019 -0500
+++ b/stacks_kmerfilter.xml	Wed Feb 27 09:50:00 2019 -0500
@@ -1,49 +1,30 @@
-<tool id="stacks2_kmerfilter" name="Stacks2: kmer filter" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@">
+<tool id="stacks2_kmerfilter" name="Stacks2: kmer filter" profile="@PROFILE@" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@">
 <description>Identify PCR clones</description>
     <macros>
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements"/>
-    <expand macro="stdio"/>
     <expand macro="version_cmd"/>
-    <command><![CDATA[
-        #if $data_type.dt_select == "single"
-            #if $data_type.fname.is_of_type('fastqsanger')
-                #set $ext = ".fq"
-                #set inputype = "fastq"
-            #else
-                #set $ext = ".fq.gz"
-                #set inputype = "gzfastq"
-            #end if
-            ln -s '$data_type.fname' R1$ext &&
-        #elif $data_type.dt_select == "pair"
-            #if $data_type.fwd.is_of_type('fastqsanger')
-                #set $ext = ".fq"
-                #set inputype = "fastq"
-            #else
-                #set $ext = ".fq.gz"
-                #set inputype = "gzfastq"
-            #end if
-            ln -s '$data_type.fwd' R1$ext &&
-            ln -s '$data_type.rev' R2$ext &&
-        #end if
+    <command detect_errors="aggressive"><![CDATA[
+@FASTQ_INPUT_FUNCTIONS@
+
+mkdir stacks_inputs stacks_outputs &&
+
+#set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select)
+$link_command
 
-        mkdir clone_outputs
-
-        &&
-
-        kmer_filter
-            #if $data_type.dt_select == 'single':
-                -f R1$ext
-            #else
-                -1 R1$ext
-                -2 R2$ext
-            #end if
+kmer_filter
+#if $input_type.input_type_select == 'single':
+    -f '$fwd_path'
+#else
+    -1 '$fwd_path'
+    -2 '$rev_path'
+#end if
 ## TODO    $options_kmer_char.read_k_freq
-            -i $inputype
-            -o clone_outputs
-            $capture
-            -y gzfastq
+-i $inputype
+-o stacks_outputs
+$capture
+-y fastq
 $options_filtering.rare
 $options_filtering.abundant
 --k_len $options_filtering.k_len
@@ -62,58 +43,73 @@
 #end if
 $options_kmer_char.k_dist
 #if $options_kmer_char.k_dist
-    | sed 's/KmerFrequency/# KmerFrequency/' $kfreqdist> 
+    | sed 's/KmerFrequency/# KmerFrequency/' > $kfreqdist
 #end if
 
+## move outputs such that Galaxy can find them
+## if filtering is on then ...filt...fq is created
+## if normalization is on then ...norm...fq is created
+## if both are active then both files are created, but only norm is needed
+#if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="":
+    #if str($options_normalization.normalize)!="":
+        #set infix="norm"
+    #else
+        #set infix="fil"
+    #end if
+    #if $capture:
+        #if $input_type.input_type_select == "single"
+            && mv stacks_outputs/*.discards.fastq '$discarded'
+        #else
+            && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward'
+            && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse'
+        #end if
+    #end if
+    #if $input_type.input_type_select == "single"
+        && mv stacks_outputs/*.${infix}.fastq '$clean'
+    #else
+        && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward'
+        && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse'
+    #end if
+#end if
 
     ]]></command>
     <inputs>
-        <conditional name="data_type">
-            <param name="dt_select" type="select" label="Single or Paired-end">
-                <option value="single">Single</option>
-                <option value="pair">Pair</option>
-            </param>
-            <when value="single">
-                <param name="fname" type="data" format="fastqsanger,fastqsanger.gz" label="FASTQ" />
-            </when>
-            <when value="pair">
-                <param name="fwd" type="data" format="fastqsanger,fastqsanger.gz" label="Forward FASTQ" />
-                <param name="rev" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse FASTQ" />
-            </when>
-        </conditional>
+        <expand macro="fastq_input_bc"/>
         <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />
         <section name="options_filtering" title="Filtering options" expanded="False">
-            <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="turn on filtering based on rare k-mers" />
-            <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="turn on filtering based on abundant k-mers" />
-            <param argument="--k_len" type="integer" value="15" label="k-mer size" />
+            <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="Turn on filtering based on rare k-mers" />
+            <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="Turn on filtering based on abundant k-mers" />
+            <param argument="--k_len" type="integer" value="15" label="K-mer size" />
         </section>
         <section name="options_advanced_filtering" title="Advanced fitering options" expanded="False">
-            <param argument="--max_k_freq" type="integer" value="20000" label="number of times a kmer must occur to be considered abundant" />
-            <param argument="--min_lim" type="integer" value="" optional="true" label="number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)." />
-            <param argument="--max_lim" type="integer" value="" optional="true" label="number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)" />
+            <param argument="--max_k_freq" type="integer" value="20000" label="Number of times a kmer must occur to be considered abundant" />
+            <param argument="--min_lim" type="integer" value="" optional="true" label="Number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)." />
+            <param argument="--max_lim" type="integer" value="" optional="true" label="Number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)" />
         </section>
         <section name="options_normalization" title="Normalization options" expanded="False">
-            <param argument="--normalize" type="integer" value="" optional="true" label="normalize read depth according to k-mer coverage" />
+            <param argument="--normalize" type="integer" value="" optional="true" label="Normalize read depth according to k-mer coverage" />
         </section>
         <section name="options_kmer_char" title="Characterizing K-mers options" expanded="False">
-            <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="write kmers along with their frequency of occurrence and exit" />
-            <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="print k-mer frequency distribution and exit" />
+            <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="Write kmers along with their frequency of occurrence and exit" />
+            <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="Print k-mer frequency distribution and exit" />
         </section>
-	<!--<section name="options_advanced_input" title="Advanced input options" expanded="False">
-            <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="read a set of kmers along with their frequencies of occurrence instead of reading raw input files" />
-	</section>-->
+        <!--<section name="options_advanced_input" title="Advanced input options" expanded="False">
+            <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="Read a set of kmers along with their frequencies of occurrence instead of reading raw input files" />
+        </section>-->
     </inputs>
     <outputs>
-        <data format="fastqsanger.gz" name="clean" from_work_dir="clone_outputs/R1.fq.gz"  label="${tool.name} on ${on_string}">
-            <filter>data_type['dt_select'] == 'single' and not options_kmer_char['k_dist']</filter>
+        <data name="clean" format="fastqsanger" label="${tool.name} on ${on_string}">
+            <filter>input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter>
         </data>
-
-        <data format="fastqsanger.gz" name="clean_fwd" from_work_dir="clone_outputs/R1.1.fq.gz" label="${tool.name} on ${on_string} Forward reads">
-            <filter>data_type['dt_select'] == 'pair' and not options_kmer_char['k_dist']</filter>
+        <collection name="clean_pair" type="paired" label="${tool.name} on ${on_string}">
+            <filter>input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
+        </collection>
+        <data name="discarded" format="fastqsanger" label="${tool.name} on ${on_string}: discarded reads">
+            <filter>capture and input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter>
         </data>
-        <data format="fastqsanger.gz" name="clean_rev" from_work_dir="clone_outputs/R2.2.fq.gz" label="${tool.name} on ${on_string} Reverse reads">
-            <filter>data_type['dt_select'] == 'pair'</filter>
-        </data>
+        <collection name="discarded_pair" type="paired" label="${tool.name} on ${on_string}: discarded reads">
+            <filter>capture and input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
+        </collection>
         <data format="tabular" name="kfreq" label="${tool.name} on ${on_string} kmer frequencies">
             <filter>options_kmer_char['write_k_freq']</filter>
         </data>
@@ -122,56 +118,74 @@
         </data>
     </outputs>
     <tests>
+        <!-- default output for filtering -->
         <test>
-            <conditional name="data_type">
-                <param name="dt_select" value="single" />
-                <param name="fname" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
+            <conditional name="input_type">
+                <param name="input_type_select" value="single" />
+                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
             </conditional>
+            <param name="rare" value="--rare"/>
+            <param name="abundant" value="--abundant" />
+            <param name="k_len" value="16" />
+            <assert_command>
+                <has_text text="--rare" />
+                <has_text text="--abundant" />
+                <has_text text="--k_len 16" />
+            </assert_command>
             <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.single.gz"/>
         </test>
         <test>
-            <conditional name="data_type">
-                <param name="dt_select" value="single" />
-                <param name="fname" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
+            <conditional name="input_type">
+                <param name="input_type_select" value="paired" />
+                <param name="fqinputs">
+                    <collection type="paired">
+                        <element name="forward" value="clonefilter/R1_0001.1.fq.gz" />
+                        <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" />
+                    </collection>
+                </param>
+            </conditional>
+            <param name="capture" value="-D" />
+            <param name="normalize" value="1" />
+            <assert_command>
+                <has_text text="--normalize 1" />
+            </assert_command>
+            <output_collection name="clean_pair" type="paired">
+                <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
+                <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
+            </output_collection>
+            <output_collection name="discarded_pair" type="paired">
+                <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
+                <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
+            </output_collection>
+        </test>
+        <!-- kfreq output -->
+        <test>
+            <conditional name="input_type">
+                <param name="input_type_select" value="single" />
+                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
             </conditional>
             <section name="options_kmer_char">
                 <param name="write_k_freq" value="--write_k_freq" />
             </section>
             <output name="kfreq" file="kmerfilter/kfreq.tsv"/>
         </test>
-	<!--<test>
-            <conditional name="data_type">
-                <param name="dt_select" value="single" />
-                <param name="fname" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
-                <param name="barcode_encoding" value="\-\-inline_null" />
-                
-                </conditional>
-            <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
-        </test>
+        <!-- kfreqdist output -->
         <test>
-            <conditional name="data_type">
-                <param name="dt_select" value="pair" />
-                <param name="fwd" ftype="fastqsanger" value="clonefilter/R1_0001.1.fq.gz" />
-                <param name="rev" ftype="fastqsanger" value="clonefilter/R2_0001.2.fq.gz" />
+            <conditional name="input_type">
+                <param name="input_type_select" value="single" />
+                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
             </conditional>
-            <output name="clean_fwd" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
-            <output name="clean_rev" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz"/>
+            <section name="options_kmer_char">
+                <param name="k_dist" value="--k_dist" />
+            </section>
+            <output name="kfreqdist" file="kmerfilter/kfreqdist.tsv"/>
         </test>
-        <test>
-            <conditional name="data_type">
-                <param name="dt_select" value="pair" />
-                <param name="fwd" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
-                <param name="rev" ftype="fastqsanger.gz" value="clonefilter/R2_0001.2.fq.gz" />
-            </conditional>
-            <output name="clean_fwd" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/>
-            <output name="clean_rev" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz"/>
-    </test>-->
     </tests>
     <help>
 <![CDATA[
 .. class:: infomark
 
-Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data. 
+Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data.
 
 @STACKS_INFOS@
 ]]>