Mercurial > repos > matthias > stacks2_kmerfilter
diff stacks_kmerfilter.xml @ 5:2277b0b50371 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit 4e87a14a5479800df9675c1cbcdbe1b11f63653b-dirty
author | matthias |
---|---|
date | Wed, 27 Feb 2019 09:50:00 -0500 |
parents | 7aec316566cb |
children | e0b1e96c4032 |
line wrap: on
line diff
--- a/stacks_kmerfilter.xml Fri Jan 04 03:30:16 2019 -0500 +++ b/stacks_kmerfilter.xml Wed Feb 27 09:50:00 2019 -0500 @@ -1,49 +1,30 @@ -<tool id="stacks2_kmerfilter" name="Stacks2: kmer filter" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@"> +<tool id="stacks2_kmerfilter" name="Stacks2: kmer filter" profile="@PROFILE@" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@"> <description>Identify PCR clones</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> - <expand macro="stdio"/> <expand macro="version_cmd"/> - <command><![CDATA[ - #if $data_type.dt_select == "single" - #if $data_type.fname.is_of_type('fastqsanger') - #set $ext = ".fq" - #set inputype = "fastq" - #else - #set $ext = ".fq.gz" - #set inputype = "gzfastq" - #end if - ln -s '$data_type.fname' R1$ext && - #elif $data_type.dt_select == "pair" - #if $data_type.fwd.is_of_type('fastqsanger') - #set $ext = ".fq" - #set inputype = "fastq" - #else - #set $ext = ".fq.gz" - #set inputype = "gzfastq" - #end if - ln -s '$data_type.fwd' R1$ext && - ln -s '$data_type.rev' R2$ext && - #end if + <command detect_errors="aggressive"><![CDATA[ +@FASTQ_INPUT_FUNCTIONS@ + +mkdir stacks_inputs stacks_outputs && + +#set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select) +$link_command - mkdir clone_outputs - - && - - kmer_filter - #if $data_type.dt_select == 'single': - -f R1$ext - #else - -1 R1$ext - -2 R2$ext - #end if +kmer_filter +#if $input_type.input_type_select == 'single': + -f '$fwd_path' +#else + -1 '$fwd_path' + -2 '$rev_path' +#end if ## TODO $options_kmer_char.read_k_freq - -i $inputype - -o clone_outputs - $capture - -y gzfastq +-i $inputype +-o stacks_outputs +$capture +-y fastq $options_filtering.rare $options_filtering.abundant --k_len $options_filtering.k_len @@ -62,58 +43,73 @@ #end if $options_kmer_char.k_dist #if $options_kmer_char.k_dist - | sed 's/KmerFrequency/# KmerFrequency/' $kfreqdist> + | sed 's/KmerFrequency/# KmerFrequency/' > $kfreqdist #end if +## move outputs such that Galaxy can find them +## if filtering is on then ...filt...fq is created +## if normalization is on then ...norm...fq is created +## if both are active then both files are created, but only norm is needed +#if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="": + #if str($options_normalization.normalize)!="": + #set infix="norm" + #else + #set infix="fil" + #end if + #if $capture: + #if $input_type.input_type_select == "single" + && mv stacks_outputs/*.discards.fastq '$discarded' + #else + && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward' + && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse' + #end if + #end if + #if $input_type.input_type_select == "single" + && mv stacks_outputs/*.${infix}.fastq '$clean' + #else + && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward' + && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse' + #end if +#end if ]]></command> <inputs> - <conditional name="data_type"> - <param name="dt_select" type="select" label="Single or Paired-end"> - <option value="single">Single</option> - <option value="pair">Pair</option> - </param> - <when value="single"> - <param name="fname" type="data" format="fastqsanger,fastqsanger.gz" label="FASTQ" /> - </when> - <when value="pair"> - <param name="fwd" type="data" format="fastqsanger,fastqsanger.gz" label="Forward FASTQ" /> - <param name="rev" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse FASTQ" /> - </when> - </conditional> + <expand macro="fastq_input_bc"/> <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" /> <section name="options_filtering" title="Filtering options" expanded="False"> - <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="turn on filtering based on rare k-mers" /> - <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="turn on filtering based on abundant k-mers" /> - <param argument="--k_len" type="integer" value="15" label="k-mer size" /> + <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="Turn on filtering based on rare k-mers" /> + <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="Turn on filtering based on abundant k-mers" /> + <param argument="--k_len" type="integer" value="15" label="K-mer size" /> </section> <section name="options_advanced_filtering" title="Advanced fitering options" expanded="False"> - <param argument="--max_k_freq" type="integer" value="20000" label="number of times a kmer must occur to be considered abundant" /> - <param argument="--min_lim" type="integer" value="" optional="true" label="number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)." /> - <param argument="--max_lim" type="integer" value="" optional="true" label="number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)" /> + <param argument="--max_k_freq" type="integer" value="20000" label="Number of times a kmer must occur to be considered abundant" /> + <param argument="--min_lim" type="integer" value="" optional="true" label="Number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)." /> + <param argument="--max_lim" type="integer" value="" optional="true" label="Number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)" /> </section> <section name="options_normalization" title="Normalization options" expanded="False"> - <param argument="--normalize" type="integer" value="" optional="true" label="normalize read depth according to k-mer coverage" /> + <param argument="--normalize" type="integer" value="" optional="true" label="Normalize read depth according to k-mer coverage" /> </section> <section name="options_kmer_char" title="Characterizing K-mers options" expanded="False"> - <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="write kmers along with their frequency of occurrence and exit" /> - <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="print k-mer frequency distribution and exit" /> + <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="Write kmers along with their frequency of occurrence and exit" /> + <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="Print k-mer frequency distribution and exit" /> </section> - <!--<section name="options_advanced_input" title="Advanced input options" expanded="False"> - <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="read a set of kmers along with their frequencies of occurrence instead of reading raw input files" /> - </section>--> + <!--<section name="options_advanced_input" title="Advanced input options" expanded="False"> + <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="Read a set of kmers along with their frequencies of occurrence instead of reading raw input files" /> + </section>--> </inputs> <outputs> - <data format="fastqsanger.gz" name="clean" from_work_dir="clone_outputs/R1.fq.gz" label="${tool.name} on ${on_string}"> - <filter>data_type['dt_select'] == 'single' and not options_kmer_char['k_dist']</filter> + <data name="clean" format="fastqsanger" label="${tool.name} on ${on_string}"> + <filter>input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter> </data> - - <data format="fastqsanger.gz" name="clean_fwd" from_work_dir="clone_outputs/R1.1.fq.gz" label="${tool.name} on ${on_string} Forward reads"> - <filter>data_type['dt_select'] == 'pair' and not options_kmer_char['k_dist']</filter> + <collection name="clean_pair" type="paired" label="${tool.name} on ${on_string}"> + <filter>input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter> + </collection> + <data name="discarded" format="fastqsanger" label="${tool.name} on ${on_string}: discarded reads"> + <filter>capture and input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter> </data> - <data format="fastqsanger.gz" name="clean_rev" from_work_dir="clone_outputs/R2.2.fq.gz" label="${tool.name} on ${on_string} Reverse reads"> - <filter>data_type['dt_select'] == 'pair'</filter> - </data> + <collection name="discarded_pair" type="paired" label="${tool.name} on ${on_string}: discarded reads"> + <filter>capture and input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter> + </collection> <data format="tabular" name="kfreq" label="${tool.name} on ${on_string} kmer frequencies"> <filter>options_kmer_char['write_k_freq']</filter> </data> @@ -122,56 +118,74 @@ </data> </outputs> <tests> + <!-- default output for filtering --> <test> - <conditional name="data_type"> - <param name="dt_select" value="single" /> - <param name="fname" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> + <conditional name="input_type"> + <param name="input_type_select" value="single" /> + <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> </conditional> + <param name="rare" value="--rare"/> + <param name="abundant" value="--abundant" /> + <param name="k_len" value="16" /> + <assert_command> + <has_text text="--rare" /> + <has_text text="--abundant" /> + <has_text text="--k_len 16" /> + </assert_command> <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.single.gz"/> </test> <test> - <conditional name="data_type"> - <param name="dt_select" value="single" /> - <param name="fname" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> + <conditional name="input_type"> + <param name="input_type_select" value="paired" /> + <param name="fqinputs"> + <collection type="paired"> + <element name="forward" value="clonefilter/R1_0001.1.fq.gz" /> + <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" /> + </collection> + </param> + </conditional> + <param name="capture" value="-D" /> + <param name="normalize" value="1" /> + <assert_command> + <has_text text="--normalize 1" /> + </assert_command> + <output_collection name="clean_pair" type="paired"> + <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" /> + <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" /> + </output_collection> + <output_collection name="discarded_pair" type="paired"> + <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" /> + <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" /> + </output_collection> + </test> + <!-- kfreq output --> + <test> + <conditional name="input_type"> + <param name="input_type_select" value="single" /> + <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> </conditional> <section name="options_kmer_char"> <param name="write_k_freq" value="--write_k_freq" /> </section> <output name="kfreq" file="kmerfilter/kfreq.tsv"/> </test> - <!--<test> - <conditional name="data_type"> - <param name="dt_select" value="single" /> - <param name="fname" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> - <param name="barcode_encoding" value="\-\-inline_null" /> - - </conditional> - <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/> - </test> + <!-- kfreqdist output --> <test> - <conditional name="data_type"> - <param name="dt_select" value="pair" /> - <param name="fwd" ftype="fastqsanger" value="clonefilter/R1_0001.1.fq.gz" /> - <param name="rev" ftype="fastqsanger" value="clonefilter/R2_0001.2.fq.gz" /> + <conditional name="input_type"> + <param name="input_type_select" value="single" /> + <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> </conditional> - <output name="clean_fwd" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/> - <output name="clean_rev" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz"/> + <section name="options_kmer_char"> + <param name="k_dist" value="--k_dist" /> + </section> + <output name="kfreqdist" file="kmerfilter/kfreqdist.tsv"/> </test> - <test> - <conditional name="data_type"> - <param name="dt_select" value="pair" /> - <param name="fwd" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" /> - <param name="rev" ftype="fastqsanger.gz" value="clonefilter/R2_0001.2.fq.gz" /> - </conditional> - <output name="clean_fwd" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz"/> - <output name="clean_rev" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz"/> - </test>--> </tests> <help> <![CDATA[ .. class:: infomark -Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data. +Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data. @STACKS_INFOS@ ]]>