Mercurial > repos > iuc > khmer_partition

--- a/do-partition.xml	Tue Mar 23 21:26:38 2021 +0000
+++ b/do-partition.xml	Tue Aug 31 09:12:27 2021 +0000
@@ -1,5 +1,6 @@
 <tool id="khmer_partition" name="khmer: Sequence partition all-in-one" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>Load, partition, and annotate sequences</description>
+    <expand macro="bio_tools"/>
     <macros>
         <token name="@BINARY@">do-partition.py</token>
         <import>macros.xml</import>
--- a/macros.xml	Tue Mar 23 21:26:38 2021 +0000
+++ b/macros.xml	Tue Aug 31 09:12:27 2021 +0000
@@ -2,6 +2,11 @@
     <token name="@TOOL_VERSION@">3.0.0a3</token>
     <token name="@VERSION_SUFFIX@">2</token>
     <token name="@PROFILE@">20.01</token>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">khmer</xref>
+        </xrefs>
+    </xml>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">khmer</requirement>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml.orig	Tue Aug 31 09:12:27 2021 +0000
@@ -0,0 +1,141 @@
+<macros>
+<<<<<<< HEAD
+    <token name="@TOOL_VERSION@">3.0.0a3</token>
+    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@PROFILE@">20.01</token>
+=======
+    <token name="@WRAPPER_VERSION@">3.0.0a3</token>
+    <token name="@TOOL_VERSION@">+galaxy1</token>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type='bio.tools'>khmer</xref>
+        </xrefs>
+    </xml>
+>>>>>>> c37d72558 (add more bio.tool IDs)
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">khmer</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command><![CDATA[@BINARY@ --version 2>&1 | tail -n 1 | cut -d ' ' -f 2]]></version_command>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" level="fatal" />
+        </stdio>
+    </xml>
+    <token name="@TABLEPARAMS@">
+<![CDATA[
+#if $parameters.type == "simple"
+  --ksize=20
+  --n_tables=4
+  --max-tablesize=$parameters.tablesize
+#else
+  --ksize=$parameters.ksize
+  --n_tables=$parameters.n_tables
+  --max-tablesize="$parameters.tablesize_specific"
+#end if
+]]>
+    </token>
+    <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token>
+    <xml name="tableinputs">
+        <conditional name="parameters">
+            <param name="type" type="select" label="Advanced Parameters"
+                help="ksize, n_tables, a specific tablesize" >
+                <option value="simple" selected="true">Hide</option>
+                <option value="specific">Show</option>
+            </param>
+            <when value="simple">
+                <param argument="" name="tablesize" type="select" label="Sample Type" display="radio">
+                    <option value="1e9" selected="true">Microbial Genome</option>
+                    <option value="2e9">Animal Transcriptome</option>
+                    <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option>
+                    <option value="16e9">Large Animal Genome</option>
+                </param>
+            </when>
+            <when value="specific">
+                <param argument="--ksize" name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" />
+                <param argument="--n_tables" name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" />
+                <param argument="--max-tablesize" name="tablesize_specific" type="text" value="1000000.0"
+                    label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" />
+            </when>
+        </conditional>
+    </xml>
+    <token name="@LINK_SEQUENCES@">
+<![CDATA[
+#import re
+mkdir input/ &&
+#set gzip=""
+#for $num, $input in enumerate($inputs)
+    ln -s '${input}' 'input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext' &&
+    #if str($input).endswith(".gz"):
+        #set gzip="--gzip"
+    #end if
+#end for]]></token>
+    <token name="@USE_SEQUENCES@">
+<![CDATA[
+#for $num, $input in enumerate($inputs)
+    '../input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext'
+#end for]]></token>
+
+    <xml name="input_sequences_filenames">
+        <param  name="inputs" multiple="true" type="data" format="fasta,fastq,fasta.gz,fastq.gz"
+            label="Sequences in FASTA or FASTQ format"
+            help="Put in order of precedence such as longest reads first." />
+    </xml>
+    <xml name="input_sequence_filename">
+        <param  name="input_sequence_filename" type="data" format="fasta,fastq,fasta.gz,fastq.gz"
+            label="Sequence in FASTA or FASTQ format" />
+    </xml>
+    <xml name="input_countgraph_filename">
+        <param  name="input_countgraph_filename" type="data" format="oxlicg"
+            label="the k-mer countgraph to query"
+            help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." />
+    </xml>
+    <xml name="abundance-histogram-output">
+        <data name="output_histogram_filename" format="txt"
+            label="${tool.name} on ${on_string}: k-mer abundance histogram. The
+                columns are: (1) k-mer abundance, (2) k-mer count, (3)
+                cumulative count, (4) fraction of total distinct k-mers." />
+    </xml>
+    <xml name="output_sequences" token_extension="">
+        <collection name="sequences" type="list">
+            <discover_datasets pattern="(?P&lt;name&gt;.*)\.(?P&lt;ext&gt;fast[aq](\.gz)?)\.@EXTENSION@" directory="output" />
+        </collection>
+    </xml>
+    <xml name="output_sequences_single">
+        <data name="output" format_source="input_sequence_filename"
+            label="${tool.name} on ${on_string}: processed nucleotide sequence file" />
+    </xml>
+    <xml name="input_zero">
+        <param argument="--no-zero" name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true"
+            help="Output zero count bins" />
+    </xml>
+    <xml name="input_bigcount">
+        <param  argument="--no-bigcount" name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount"
+            checked="true" help="Count k-mers past 255 occurences" />
+    </xml>
+    <token name="@HELP_FOOTER@"><![CDATA[
+(from the khmer project: http://khmer.readthedocs.org/en/v2.0/ )]]></token>
+    <xml name="software-citation">
+        <citation type="doi">10.12688/f1000research.6924.1</citation>
+    </xml>
+    <xml name="diginorm-citation">
+        <citation type="bibtex">@unpublished{diginorm,
+    author = "Brown, C Titus and Howe, Adina and Zhang, Qingpeng and Pyrkosz,
+Alexis B and Brom, Timothy H",
+    title = "A Reference-Free Algorithm for Computational Normalization of
+Shotgun Sequencing Data",
+    year = "2012",
+    eprint = "arXiv:1203.4802",
+    url = "http://arxiv.org/abs/1203.4802",
+}</citation></xml>
+    <xml name="graph-citation">
+        <citation type="doi">10.1073/pnas.1121464109</citation>
+    </xml>
+    <xml name="counting-citation">
+        <citation type="doi">10.1371/journal.pone.0101271</citation>
+    </xml>
+</macros>