khmer: normalize-by-median.xml comparison

comparison normalize-by-median.xml @ 0:0187f18785a3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 37727831a2630b7a7d4fb033366cbd772c3086c8

author	iuc
date	Sat, 17 Oct 2015 04:02:33 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:0187f18785a3
+<tool id="gedlab-khmer-normalize-by-median" name="Normalize By Median" version="@WRAPPER_VERSION@-4">
+<description>Filters a fastq/fasta file using digital normalization via median k-mer abundances</description>
+<macros>
+<token name="@BINARY@">normalize-by-median.py</token>
+<import>macros.xml</import>
+</macros>
+<expand macro="requirements" />
+<expand macro="stdio" />
+<expand macro="version" />
+<command><![CDATA[
+set -xu &&
+#for $num, $input in enumerate($inputs)
+ln -s ${input} sequence-${num} &&
+#end for
+mkdir output &&
+cd output &&
+normalize-by-median.py
+${paired_switch}
+${force_single_switch}
+@TABLEPARAMS@
+--cutoff=${cutoff}
+#if $unpaired_reads_filename
+--unpaired-reads=${unpaired_reads_filename}
+#end if
+#if $save_countgraph
+--savegraph=${countgraph}
+#end if
+#if $countgraph_to_load
+--loadgraph=${countgraph_to_load}
+#end if
+--report=${report}
+../sequence-*
+]]>
+</command>
+<inputs>
+<expand macro="input_sequences_filenames" />
+<param name="paired_switch" type="boolean" checked="false" truevalue="--paired" falsevalue=""
+label="Require all sequences be properly paired?"
+help="(--paired) The tool will fail if given improperly paired reads and this option is selected." />
+<param name="force_single_switch" type="boolean" checked="false" truevalue="--force_single" falsevalue=""
+label="Ignore all pairing information?"
+help="(--paired) By default this tool process reads in a pair-aware manner. This option disables that behavior." />
+<param name="unpaired_reads_filename" type="data" format="fasta,fastq,fastqsanger,fastqsolexa,fastqillumina" optional="true"
+label="Extra unpaired reads"
+help="(--unpaired-reads) If all but one of your sequence files are interleaved paired end reads you can include one unpaired file to be processed last without regard to pairing." />
+<param name="countgraph_to_load" type="data" format="oxlicg" optional="true"
+label="Optional k-mer countgraph"
+help="(--loadgraph) The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
+<param name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="(--savegraph)" />
+<param name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="(--cutoff)" />
+<expand macro="tableinputs" />
+</inputs>
+<outputs>
+<data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph">
+<filter>save_countgraph == True</filter>
+</data>
+<data name="report" format="txt" label="${tool.name} report" />
+<collection name="sequences" type="list">
+<discover_datasets pattern="__name__" directory="output" />
+</collection>
+</outputs>
+<tests>
+<test>
+<param name="inputs" value="test-abund-read-2.fa"/>
+<param name="type" value="specific" />
+<param name="cutoff" value="1" />
+<param name="ksize" value="17" />
+<output name="report" file="normalize-by-median.report.txt" />
+<output_collection name="sequences" type="list">
+<element name="sequence-0.keep">
+<assert_contents>
+<has_text text="GGTTGACGGGGCTCAGGGGG" />
+</assert_contents>
+</element>
+</output_collection>
+</test>
+<test>
+<param name="inputs" value="test-abund-read-2.fa" />
+<param name="type" value="specific" />
+<param name="cutoff" value="2" />
+<param name="ksize" value="17" />
+<output name="report" file="normalize-by-median.c2.report.txt" />
+<output_collection name="sequences" type="list">
+<element name="sequence-0.keep">
+<assert_contents>
+<has_text text="GGTTGACGGGGCTCAGGGGG" />
+<has_text text="GGTTGACGGGGCTCAGGG" />
+</assert_contents>
+</element>
+</output_collection>
+</test>
+<test>
+<param name="inputs" value="test-abund-read-paired.fa" />
+<param name="type" value="specific" />
+<param name="cutoff" value="1" />
+<param name="ksize" value="17" />
+<param name="paired" value="true" />
+<output name="report" file="normalize-by-median.paired.report.txt" />
+<output_collection name="sequences" type="list">
+<element name="sequence-0.keep">
+<assert_contents>
+<has_text text="GGTTGACGGGGCTCAGGGGG" />
+<has_text text="GGTTGACGGGGCTCAGGG" />
+</assert_contents>
+</element>
+</output_collection>
+</test>
+</tests>
+<help><![CDATA[
+Do digital normalization (remove mostly redundant sequences)
+Discard sequences based on whether or not their median k-mer abundance lies
+above a specified cutoff. Kept sequences will be placed in <fileN>.keep.
+By default, Paired end reads will be considered together; if either read will
+be kept, then both will be kept. (This keeps both reads from a fragment, and
+helps with retention of repeats.) Unpaired reads are treated individually.
+If `--paired` is set then proper pairing is required and the tool will exit on
+unpaired reads, although `--unpaired-reads` can be used to supply a file of
+orphan reads to be read after the paired reads.
+`--force_single` will ignore all pairing information and treat reads
+individually.
+With `-s`/`--savegraph`, the k-mer countgraph will be saved to the specified
+file after all sequences have been processed. `--loadgraph` will load the
+specified k-mer countgraph before processing the specified files.  Note
+that the countgraph is in same format as those produced by
+`load-into-counting.py` and consumed by `abundance-dist.py`.
+@HELP_FOOTER@
+]]>
+</help>
+<citations>
+<expand macro="software-citation" />
+<expand macro="diginorm-citation" />
+</citations>
+</tool>

Mercurial > repos > iuc > khmer

comparison normalize-by-median.xml @ 0:0187f18785a3 draft default tip