diff stacks_kmerfilter.xml @ 2:9247decc373c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
author iuc
date Wed, 15 Jul 2020 21:14:37 +0000
parents 43b4ef96585f
children 9a119e97b272
line wrap: on
line diff
--- a/stacks_kmerfilter.xml	Mon Sep 30 14:09:45 2019 -0400
+++ b/stacks_kmerfilter.xml	Wed Jul 15 21:14:37 2020 +0000
@@ -7,7 +7,7 @@
     <expand macro="version_cmd"/>
     <command detect_errors="aggressive"><![CDATA[
 @FASTQ_INPUT_FUNCTIONS@
-
+trap ">&2 cat '$output_log'" err exit &&
 mkdir stacks_inputs stacks_outputs &&
 
 #set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select)
@@ -39,152 +39,156 @@
     --normalize $options_normalization.normalize
 #end if
 #if $options_kmer_char.write_k_freq
-    --read_k_freq $kfreq
+    --write-k-freq $kfreqdist
 #end if
+## TODO read_k_freq
 $options_kmer_char.k_dist
 #if $options_kmer_char.k_dist
-    | sed 's/KmerFrequency/# KmerFrequency/' > $kfreqdist
+    > '$kfreq'
 #end if
 @TEE_APPEND_LOG@
-@CAT_LOG_TO_STDERR@
 
-## move outputs such that Galaxy can find them
-## if filtering is on then ...filt...fq is created
-## if normalization is on then ...norm...fq is created
-## if both are active then both files are created, but only norm is needed
-#if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="":
-    #if str($options_normalization.normalize)!="":
-        #set infix="norm"
-    #else
-        #set infix="fil"
-    #end if
-    #if $capture:
+#if $options_kmer_char.k_dist 
+    && sed -i -e 's/KmerFrequency/# KmerFrequency/' '$kfreq'
+#elif $options_kmer_char.write_k_freq
+    && sed -i -e 's/# Kmer Count/#Kmer\tCount/; s/ /\t/' '$kfreqdist';
+#else
+    ## move outputs such that Galaxy can find them
+    ## if filtering is on then ...filt...fq is created
+    ## if normalization is on then ...norm...fq is created
+    ## if both are active then both files are created, but only norm is needed
+    #if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="":
+        #if str($options_normalization.normalize)!="":
+            #set infix="norm"
+        #else
+            #set infix="fil"
+        #end if
+        #if $capture:
+            #if $input_type.input_type_select == "single"
+                && mv stacks_outputs/*.discards.fastq '$discarded'
+            #else
+                && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward'
+                && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse'
+            #end if
+        #end if
         #if $input_type.input_type_select == "single"
-            && mv stacks_outputs/*.discards.fastq '$discarded'
+            && mv stacks_outputs/*.${infix}.fastq '$clean'
         #else
-            && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward'
-            && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse'
+            && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward'
+            && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse'
         #end if
     #end if
-    #if $input_type.input_type_select == "single"
-        && mv stacks_outputs/*.${infix}.fastq '$clean'
-    #else
-        && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward'
-        && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse'
-    #end if
 #end if
-
     ]]></command>
     <inputs>
         <expand macro="fastq_input_bc"/>
-        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />
+        <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file"/>
         <section name="options_filtering" title="Filtering options" expanded="False">
-            <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="Turn on filtering based on rare k-mers" />
-            <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="Turn on filtering based on abundant k-mers" />
-            <param argument="--k_len" type="integer" value="15" label="K-mer size" />
+            <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="Turn on filtering based on rare k-mers"/>
+            <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="Turn on filtering based on abundant k-mers"/>
+            <param argument="--k_len" type="integer" value="15" label="K-mer size"/>
         </section>
         <section name="options_advanced_filtering" title="Advanced fitering options" expanded="False">
-            <param argument="--max_k_freq" type="integer" value="20000" label="Number of times a kmer must occur to be considered abundant" />
-            <param argument="--min_lim" type="integer" value="" optional="true" label="Number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)." />
-            <param argument="--max_lim" type="integer" value="" optional="true" label="Number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)" />
+            <param argument="--max_k_freq" type="integer" value="20000" label="Number of times a kmer must occur to be considered abundant"/>
+            <param argument="--min_lim" type="integer" value="" optional="true" label="Number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)."/>
+            <param argument="--max_lim" type="integer" value="" optional="true" label="Number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)"/>
         </section>
         <section name="options_normalization" title="Normalization options" expanded="False">
-            <param argument="--normalize" type="integer" value="" optional="true" label="Normalize read depth according to k-mer coverage" />
+            <param argument="--normalize" type="integer" value="" optional="true" label="Normalize read depth according to k-mer coverage"/>
         </section>
         <section name="options_kmer_char" title="Characterizing K-mers options" expanded="False">
-            <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="Write kmers along with their frequency of occurrence and exit" />
-            <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="Print k-mer frequency distribution and exit" />
+            <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="Write kmers along with their frequency of occurrence and exit"/>
+            <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="Print k-mer frequency distribution and exit"/>
         </section>
         <!--<section name="options_advanced_input" title="Advanced input options" expanded="False">
-            <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="Read a set of kmers along with their frequencies of occurrence instead of reading raw input files" />
+            <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="Read a set of kmers along with their frequencies of occurrence instead of reading raw input files"/>
         </section>-->
         <expand macro="in_log"/>
     </inputs>
     <outputs>
         <expand macro="out_log"/>
-        <data name="clean" format="fastqsanger" label="${tool.name} on ${on_string}">
-            <filter>input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter>
-        </data>
-        <collection name="clean_pair" type="paired" label="${tool.name} on ${on_string}">
-            <filter>input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
-        </collection>
-        <data name="discarded" format="fastqsanger" label="${tool.name} on ${on_string}: discarded reads">
-            <filter>capture and input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter>
-        </data>
-        <collection name="discarded_pair" type="paired" label="${tool.name} on ${on_string}: discarded reads">
-            <filter>capture and input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
-        </collection>
+        <expand macro="fastq_output_filter">
+            <filter>not options_kmer_char['k_dist'] and not options_kmer_char['write_k_freq']</filter>
+        </expand>
         <data format="tabular" name="kfreq" label="${tool.name} on ${on_string} kmer frequencies">
-            <filter>options_kmer_char['write_k_freq']</filter>
+            <filter>options_kmer_char['k_dist']</filter>
         </data>
         <data format="tabular" name="kfreqdist" label="${tool.name} on ${on_string} kmer frequency distribution">
-            <filter>options_kmer_char['k_dist']</filter>
+            <filter>options_kmer_char['write_k_freq']</filter>
         </data>
     </outputs>
     <tests>
         <!-- default output for filtering -->
-        <test>
+        <test expect_num_outputs="2">
             <conditional name="input_type">
-                <param name="input_type_select" value="single" />
-                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
+                <param name="input_type_select" value="single"/>
+                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
             </conditional>
-            <param name="add_log" value="yes" />
+            <param name="add_log" value="yes"/>
             <output name="output_log" ftype="txt" file="kmerfilter/kmerfilter.log" lines_diff="8"/>
             <param name="rare" value="--rare"/>
-            <param name="abundant" value="--abundant" />
-            <param name="k_len" value="16" />
+            <param name="abundant" value="--abundant"/>
+            <param name="k_len" value="16"/>
             <assert_command>
-                <has_text text="--rare" />
-                <has_text text="--abundant" />
-                <has_text text="--k_len 16" />
+                <has_text text="--rare"/>
+                <has_text text="--abundant"/>
+                <has_text text="--k_len 16"/>
             </assert_command>
-            <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.single.gz"/>
+            <param name="add_log" value="yes"/>
+            <output name="output_log"><assert_contents><has_text text="5 retained reads."/></assert_contents></output>
+            <output name="clean" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Removed1_0001.1.1.fq.single.gz"/>
         </test>
-        <test>
+        <test expect_num_outputs="7">
             <conditional name="input_type">
-                <param name="input_type_select" value="paired" />
+                <param name="input_type_select" value="paired"/>
                 <param name="fqinputs">
                     <collection type="paired">
-                        <element name="forward" value="clonefilter/R1_0001.1.fq.gz" />
-                        <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" />
+                        <element name="forward" value="clonefilter/R1_0001.1.fq.gz"/>
+                        <element name="reverse" value="clonefilter/R2_0001.2.fq.gz"/>
                     </collection>
                 </param>
             </conditional>
-            <param name="capture" value="-D" />
-            <param name="normalize" value="1" />
+            <param name="capture" value="-D"/>
+            <param name="normalize" value="1"/>
             <assert_command>
-                <has_text text="--normalize 1" />
+                <has_text text="--normalize 1"/>
             </assert_command>
+            <param name="add_log" value="yes"/>
+            <output name="output_log"><assert_contents><has_text text="8 retained reads."/></assert_contents></output>
             <output_collection name="clean_pair" type="paired">
-                <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
-                <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
+                <element name="forward" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Removed1_0001.1.1.fq.gz"/>
+                <element name="reverse" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Removed2_0001.2.2.fq.gz"/>
             </output_collection>
             <output_collection name="discarded_pair" type="paired">
-                <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
-                <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
+                <element name="forward" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Discarded1_0001.1.1.fq.gz"/>
+                <element name="reverse" compare="sim_size" delta_frac="0.01" ftype="fastqsanger.gz" file="kmerfilter/Discarded2_0001.2.2.fq.gz"/>
             </output_collection>
         </test>
         <!-- kfreq output -->
-        <test>
+        <test expect_num_outputs="2">
             <conditional name="input_type">
-                <param name="input_type_select" value="single" />
-                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
+                <param name="input_type_select" value="single"/>
+                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
             </conditional>
             <section name="options_kmer_char">
-                <param name="write_k_freq" value="--write_k_freq" />
+                <param name="write_k_freq" value="--write_k_freq"/>
             </section>
-            <output name="kfreq" file="kmerfilter/kfreq.tsv"/>
+            <param name="add_log" value="yes"/>
+            <output name="output_log"><assert_contents><has_text text="done."/></assert_contents></output>
+            <output name="kfreqdist" file="kmerfilter/kfreqdist.tsv"/>
         </test>
         <!-- kfreqdist output -->
-        <test>
+        <test expect_num_outputs="1">
             <conditional name="input_type">
-                <param name="input_type_select" value="single" />
-                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
+                <param name="input_type_select" value="single"/>
+                <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz"/>
             </conditional>
             <section name="options_kmer_char">
-                <param name="k_dist" value="--k_dist" />
+                <param name="k_dist" value="--k_dist"/>
             </section>
-            <output name="kfreqdist" file="kmerfilter/kfreqdist.tsv"/>
+            <param name="add_log" value="no"/>
+            <assert_stderr><has_text text="Generating kmer distribution..."/></assert_stderr>
+            <output name="kfreq" file="kmerfilter/kfreq.tsv"/>
         </test>
     </tests>
     <help>
@@ -196,5 +200,5 @@
 @STACKS_INFOS@
 ]]>
     </help>
-    <expand macro="citation" />
+    <expand macro="citation"/>
 </tool>