Mercurial > repos > iuc > khmer_normalize_by_median

--- a/macros.xml	Tue Aug 31 09:12:02 2021 +0000
+++ b/macros.xml	Thu Oct 03 13:45:11 2024 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">3.0.0a3</token>
-    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@VERSION_SUFFIX@">3</token>
     <token name="@PROFILE@">20.01</token>
     <xml name="bio_tools">
         <xrefs>
@@ -34,7 +34,7 @@
 #end if
 ]]>
     </token>
-    <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token>
+    <token name="@THREADS@">--threads "\${GALAXY_SLOTS:-4}"</token>
     <xml name="tableinputs">
         <conditional name="parameters">
             <param name="type" type="select" label="Advanced Parameters"
@@ -43,7 +43,7 @@
                 <option value="specific">Show</option>
             </param>
             <when value="simple">
-                <param argument="" name="tablesize" type="select" label="Sample Type" display="radio">
+                <param argument="--max-tablesize" name="tablesize" type="select" label="Sample Type" display="radio">
                     <option value="1e9" selected="true">Microbial Genome</option>
                     <option value="2e9">Animal Transcriptome</option>
                     <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option>
@@ -51,8 +51,8 @@
                 </param>
             </when>
             <when value="specific">
-                <param argument="--ksize" name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" />
-                <param argument="--n_tables" name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" />
+                <param argument="--ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" />
+                <param argument="--n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" />
                 <param argument="--max-tablesize" name="tablesize_specific" type="text" value="1000000.0"
                     label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" />
             </when>
@@ -90,11 +90,13 @@
             help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." />
     </xml>
     <xml name="abundance-histogram-output">
-        <data name="output_histogram_filename" format="txt"
-            label="${tool.name} on ${on_string}: k-mer abundance histogram. The
-                columns are: (1) k-mer abundance, (2) k-mer count, (3)
-                cumulative count, (4) fraction of total distinct k-mers." />
+        <data name="output_histogram_filename" format="csv"
+            label="${tool.name} on ${on_string}: k-mer abundance histogram" />
     </xml>
+    <token name="@ABUNDANCE_HISTOGRAM_OUTPUT_HELP@"><![CDATA[
+ The columns  of the k-mer abundance histogram are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers.
+]]></token>
+
     <xml name="output_sequences" token_extension="">
         <collection name="sequences" type="list">
             <discover_datasets pattern="(?P&lt;name&gt;.*)\.(?P&lt;ext&gt;fast[aq](\.gz)?)\.@EXTENSION@" directory="output" />
--- a/macros.xml.orig	Tue Aug 31 09:12:02 2021 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,141 +0,0 @@
-<macros>
-<<<<<<< HEAD
-    <token name="@TOOL_VERSION@">3.0.0a3</token>
-    <token name="@VERSION_SUFFIX@">2</token>
-    <token name="@PROFILE@">20.01</token>
-=======
-    <token name="@WRAPPER_VERSION@">3.0.0a3</token>
-    <token name="@TOOL_VERSION@">+galaxy1</token>
-    <xml name="bio_tools">
-        <xrefs>
-            <xref type='bio.tools'>khmer</xref>
-        </xrefs>
-    </xml>
->>>>>>> c37d72558 (add more bio.tool IDs)
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package" version="@TOOL_VERSION@">khmer</requirement>
-            <yield/>
-        </requirements>
-    </xml>
-    <xml name="version">
-        <version_command><![CDATA[@BINARY@ --version 2>&1 | tail -n 1 | cut -d ' ' -f 2]]></version_command>
-    </xml>
-    <xml name="stdio">
-        <stdio>
-            <exit_code range="1:" level="fatal" />
-        </stdio>
-    </xml>
-    <token name="@TABLEPARAMS@">
-<![CDATA[
-#if $parameters.type == "simple"
-  --ksize=20
-  --n_tables=4
-  --max-tablesize=$parameters.tablesize
-#else
-  --ksize=$parameters.ksize
-  --n_tables=$parameters.n_tables
-  --max-tablesize="$parameters.tablesize_specific"
-#end if
-]]>
-    </token>
-    <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token>
-    <xml name="tableinputs">
-        <conditional name="parameters">
-            <param name="type" type="select" label="Advanced Parameters"
-                help="ksize, n_tables, a specific tablesize" >
-                <option value="simple" selected="true">Hide</option>
-                <option value="specific">Show</option>
-            </param>
-            <when value="simple">
-                <param argument="" name="tablesize" type="select" label="Sample Type" display="radio">
-                    <option value="1e9" selected="true">Microbial Genome</option>
-                    <option value="2e9">Animal Transcriptome</option>
-                    <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option>
-                    <option value="16e9">Large Animal Genome</option>
-                </param>
-            </when>
-            <when value="specific">
-                <param argument="--ksize" name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" />
-                <param argument="--n_tables" name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" />
-                <param argument="--max-tablesize" name="tablesize_specific" type="text" value="1000000.0"
-                    label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" />
-            </when>
-        </conditional>
-    </xml>
-    <token name="@LINK_SEQUENCES@">
-<![CDATA[
-#import re
-mkdir input/ &&
-#set gzip=""
-#for $num, $input in enumerate($inputs)
-    ln -s '${input}' 'input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext' &&
-    #if str($input).endswith(".gz"):
-        #set gzip="--gzip"
-    #end if
-#end for]]></token>
-    <token name="@USE_SEQUENCES@">
-<![CDATA[
-#for $num, $input in enumerate($inputs)
-    '../input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext'
-#end for]]></token>
-
-    <xml name="input_sequences_filenames">
-        <param  name="inputs" multiple="true" type="data" format="fasta,fastq,fasta.gz,fastq.gz"
-            label="Sequences in FASTA or FASTQ format"
-            help="Put in order of precedence such as longest reads first." />
-    </xml>
-    <xml name="input_sequence_filename">
-        <param  name="input_sequence_filename" type="data" format="fasta,fastq,fasta.gz,fastq.gz"
-            label="Sequence in FASTA or FASTQ format" />
-    </xml>
-    <xml name="input_countgraph_filename">
-        <param  name="input_countgraph_filename" type="data" format="oxlicg"
-            label="the k-mer countgraph to query"
-            help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." />
-    </xml>
-    <xml name="abundance-histogram-output">
-        <data name="output_histogram_filename" format="txt"
-            label="${tool.name} on ${on_string}: k-mer abundance histogram. The
-                columns are: (1) k-mer abundance, (2) k-mer count, (3)
-                cumulative count, (4) fraction of total distinct k-mers." />
-    </xml>
-    <xml name="output_sequences" token_extension="">
-        <collection name="sequences" type="list">
-            <discover_datasets pattern="(?P&lt;name&gt;.*)\.(?P&lt;ext&gt;fast[aq](\.gz)?)\.@EXTENSION@" directory="output" />
-        </collection>
-    </xml>
-    <xml name="output_sequences_single">
-        <data name="output" format_source="input_sequence_filename"
-            label="${tool.name} on ${on_string}: processed nucleotide sequence file" />
-    </xml>
-    <xml name="input_zero">
-        <param argument="--no-zero" name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true"
-            help="Output zero count bins" />
-    </xml>
-    <xml name="input_bigcount">
-        <param  argument="--no-bigcount" name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount"
-            checked="true" help="Count k-mers past 255 occurences" />
-    </xml>
-    <token name="@HELP_FOOTER@"><![CDATA[
-(from the khmer project: http://khmer.readthedocs.org/en/v2.0/ )]]></token>
-    <xml name="software-citation">
-        <citation type="doi">10.12688/f1000research.6924.1</citation>
-    </xml>
-    <xml name="diginorm-citation">
-        <citation type="bibtex">@unpublished{diginorm,
-    author = "Brown, C Titus and Howe, Adina and Zhang, Qingpeng and Pyrkosz,
-Alexis B and Brom, Timothy H",
-    title = "A Reference-Free Algorithm for Computational Normalization of
-Shotgun Sequencing Data",
-    year = "2012",
-    eprint = "arXiv:1203.4802",
-    url = "http://arxiv.org/abs/1203.4802",
-}</citation></xml>
-    <xml name="graph-citation">
-        <citation type="doi">10.1073/pnas.1121464109</citation>
-    </xml>
-    <xml name="counting-citation">
-        <citation type="doi">10.1371/journal.pone.0101271</citation>
-    </xml>
-</macros>
--- a/normalize-by-median.xml	Tue Aug 31 09:12:02 2021 +0000
+++ b/normalize-by-median.xml	Thu Oct 03 13:45:11 2024 +0000
@@ -1,10 +1,10 @@
 <tool id="khmer_normalize_by_median" name="khmer: Normalize By Median" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>Filter reads using digital normalization via k-mer abundances</description>
-    <expand macro="bio_tools"/>
     <macros>
         <token name="@BINARY@">normalize-by-median.py</token>
         <import>macros.xml</import>
     </macros>
+    <expand macro="bio_tools"/>
     <expand macro="requirements" />
     <expand macro="stdio" />
     <expand macro="version" />
@@ -49,18 +49,18 @@
             label="Optional k-mer countgraph"
             help="The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." />
         <param argument="--savegraph" name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="" />
-        <param argument="--cutoff" name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="" />
+        <param argument="--cutoff" type="integer" min="1" value="20" label="Cutoff" help="" />
         <expand macro="tableinputs" />
     </inputs>
     <outputs>
-        <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph">
+        <data name="countgraph" format="oxlicg" label="${tool.name} on ${on_string}: k-mer countgraph">
             <filter>save_countgraph == True</filter>
         </data>
-        <data name="report" format="txt" label="${tool.name} report" />
+        <data name="report" format="csv" label="${tool.name} on ${on_string}: report" />
         <expand macro="output_sequences" extension="keep"/>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="2">
             <param name="inputs" value="test-abund-read-2.fa" ftype="fasta"/>
             <param name="type" value="specific" />
             <param name="cutoff" value="1" />
@@ -74,7 +74,7 @@
                 </element>
             </output_collection>
         </test>
-        <test>
+        <test expect_num_outputs="2">
             <param name="inputs" value="test-abund-read-2.fa.gz"  ftype="fasta.gz"/>
             <param name="type" value="specific" />
             <param name="cutoff" value="2" />
@@ -89,12 +89,13 @@
                 </element>
             </output_collection>
         </test>
-        <test>
+        <test expect_num_outputs="3">
             <param name="inputs" value="test-abund-read-paired.fa" ftype="fasta"/>
             <param name="type" value="specific" />
             <param name="cutoff" value="1" />
             <param name="ksize" value="17" />
             <param name="paired" value="true" />
+            <param name="save_countgraph" value="true"/>
             <output name="report" file="normalize-by-median.paired.report.txt" />
             <output_collection name="sequences" type="list">
                 <element name="test-abund-read-paired.fa" ftype="fasta">
@@ -104,6 +105,11 @@
                     </assert_contents>
                 </element>
             </output_collection>
+            <output name="countgraph">
+                <assert_contents>
+                    <has_size size="1k"/>
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help><![CDATA[