Mercurial > repos > iuc > khmer_normalize_by_median
changeset 11:aa02ecdaf9ef draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit 238d0992c63de53623c4fc05eec8bd8d67001997
| author | iuc |
|---|---|
| date | Thu, 03 Oct 2024 13:45:11 +0000 |
| parents | 4e03aee23dc6 |
| children | |
| files | macros.xml macros.xml.orig normalize-by-median.xml |
| diffstat | 3 files changed, 24 insertions(+), 157 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Tue Aug 31 09:12:02 2021 +0000 +++ b/macros.xml Thu Oct 03 13:45:11 2024 +0000 @@ -1,6 +1,6 @@ <macros> <token name="@TOOL_VERSION@">3.0.0a3</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> <token name="@PROFILE@">20.01</token> <xml name="bio_tools"> <xrefs> @@ -34,7 +34,7 @@ #end if ]]> </token> - <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token> + <token name="@THREADS@">--threads "\${GALAXY_SLOTS:-4}"</token> <xml name="tableinputs"> <conditional name="parameters"> <param name="type" type="select" label="Advanced Parameters" @@ -43,7 +43,7 @@ <option value="specific">Show</option> </param> <when value="simple"> - <param argument="" name="tablesize" type="select" label="Sample Type" display="radio"> + <param argument="--max-tablesize" name="tablesize" type="select" label="Sample Type" display="radio"> <option value="1e9" selected="true">Microbial Genome</option> <option value="2e9">Animal Transcriptome</option> <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option> @@ -51,8 +51,8 @@ </param> </when> <when value="specific"> - <param argument="--ksize" name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" /> - <param argument="--n_tables" name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" /> + <param argument="--ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" /> + <param argument="--n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" /> <param argument="--max-tablesize" name="tablesize_specific" type="text" value="1000000.0" label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" /> </when> @@ -90,11 +90,13 @@ help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." /> </xml> <xml name="abundance-histogram-output"> - <data name="output_histogram_filename" format="txt" - label="${tool.name} on ${on_string}: k-mer abundance histogram. The - columns are: (1) k-mer abundance, (2) k-mer count, (3) - cumulative count, (4) fraction of total distinct k-mers." /> + <data name="output_histogram_filename" format="csv" + label="${tool.name} on ${on_string}: k-mer abundance histogram" /> </xml> + <token name="@ABUNDANCE_HISTOGRAM_OUTPUT_HELP@"><![CDATA[ + The columns of the k-mer abundance histogram are: (1) k-mer abundance, (2) k-mer count, (3) cumulative count, (4) fraction of total distinct k-mers. +]]></token> + <xml name="output_sequences" token_extension=""> <collection name="sequences" type="list"> <discover_datasets pattern="(?P<name>.*)\.(?P<ext>fast[aq](\.gz)?)\.@EXTENSION@" directory="output" />
--- a/macros.xml.orig Tue Aug 31 09:12:02 2021 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,141 +0,0 @@ -<macros> -<<<<<<< HEAD - <token name="@TOOL_VERSION@">3.0.0a3</token> - <token name="@VERSION_SUFFIX@">2</token> - <token name="@PROFILE@">20.01</token> -======= - <token name="@WRAPPER_VERSION@">3.0.0a3</token> - <token name="@TOOL_VERSION@">+galaxy1</token> - <xml name="bio_tools"> - <xrefs> - <xref type='bio.tools'>khmer</xref> - </xrefs> - </xml> ->>>>>>> c37d72558 (add more bio.tool IDs) - <xml name="requirements"> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">khmer</requirement> - <yield/> - </requirements> - </xml> - <xml name="version"> - <version_command><![CDATA[@BINARY@ --version 2>&1 | tail -n 1 | cut -d ' ' -f 2]]></version_command> - </xml> - <xml name="stdio"> - <stdio> - <exit_code range="1:" level="fatal" /> - </stdio> - </xml> - <token name="@TABLEPARAMS@"> -<![CDATA[ -#if $parameters.type == "simple" - --ksize=20 - --n_tables=4 - --max-tablesize=$parameters.tablesize -#else - --ksize=$parameters.ksize - --n_tables=$parameters.n_tables - --max-tablesize="$parameters.tablesize_specific" -#end if -]]> - </token> - <token name="@THREADS@">--threads \${GALAXY_SLOTS:-4}</token> - <xml name="tableinputs"> - <conditional name="parameters"> - <param name="type" type="select" label="Advanced Parameters" - help="ksize, n_tables, a specific tablesize" > - <option value="simple" selected="true">Hide</option> - <option value="specific">Show</option> - </param> - <when value="simple"> - <param argument="" name="tablesize" type="select" label="Sample Type" display="radio"> - <option value="1e9" selected="true">Microbial Genome</option> - <option value="2e9">Animal Transcriptome</option> - <option value="4e9">Small Animal Genome or Low-Diversity Metagenome</option> - <option value="16e9">Large Animal Genome</option> - </param> - </when> - <when value="specific"> - <param argument="--ksize" name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" /> - <param argument="--n_tables" name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" /> - <param argument="--max-tablesize" name="tablesize_specific" type="text" value="1000000.0" - label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" /> - </when> - </conditional> - </xml> - <token name="@LINK_SEQUENCES@"> -<![CDATA[ -#import re -mkdir input/ && -#set gzip="" -#for $num, $input in enumerate($inputs) - ln -s '${input}' 'input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext' && - #if str($input).endswith(".gz"): - #set gzip="--gzip" - #end if -#end for]]></token> - <token name="@USE_SEQUENCES@"> -<![CDATA[ -#for $num, $input in enumerate($inputs) - '../input/$re.sub("[^\w\-_.]", "_", $input.element_identifier).$input.ext' -#end for]]></token> - - <xml name="input_sequences_filenames"> - <param name="inputs" multiple="true" type="data" format="fasta,fastq,fasta.gz,fastq.gz" - label="Sequences in FASTA or FASTQ format" - help="Put in order of precedence such as longest reads first." /> - </xml> - <xml name="input_sequence_filename"> - <param name="input_sequence_filename" type="data" format="fasta,fastq,fasta.gz,fastq.gz" - label="Sequence in FASTA or FASTQ format" /> - </xml> - <xml name="input_countgraph_filename"> - <param name="input_countgraph_filename" type="data" format="oxlicg" - label="the k-mer countgraph to query" - help="The abundances of the k-mers in the input nucleotide sequence file will be calculated using the kmer counts in this k-mer countgraph." /> - </xml> - <xml name="abundance-histogram-output"> - <data name="output_histogram_filename" format="txt" - label="${tool.name} on ${on_string}: k-mer abundance histogram. The - columns are: (1) k-mer abundance, (2) k-mer count, (3) - cumulative count, (4) fraction of total distinct k-mers." /> - </xml> - <xml name="output_sequences" token_extension=""> - <collection name="sequences" type="list"> - <discover_datasets pattern="(?P<name>.*)\.(?P<ext>fast[aq](\.gz)?)\.@EXTENSION@" directory="output" /> - </collection> - </xml> - <xml name="output_sequences_single"> - <data name="output" format_source="input_sequence_filename" - label="${tool.name} on ${on_string}: processed nucleotide sequence file" /> - </xml> - <xml name="input_zero"> - <param argument="--no-zero" name="zero" type="boolean" truevalue="" falsevalue="--no-zero" checked="true" - help="Output zero count bins" /> - </xml> - <xml name="input_bigcount"> - <param argument="--no-bigcount" name="bigcount" type="boolean" truevalue="" falsevalue="--no-bigcount" - checked="true" help="Count k-mers past 255 occurences" /> - </xml> - <token name="@HELP_FOOTER@"><![CDATA[ -(from the khmer project: http://khmer.readthedocs.org/en/v2.0/ )]]></token> - <xml name="software-citation"> - <citation type="doi">10.12688/f1000research.6924.1</citation> - </xml> - <xml name="diginorm-citation"> - <citation type="bibtex">@unpublished{diginorm, - author = "Brown, C Titus and Howe, Adina and Zhang, Qingpeng and Pyrkosz, -Alexis B and Brom, Timothy H", - title = "A Reference-Free Algorithm for Computational Normalization of -Shotgun Sequencing Data", - year = "2012", - eprint = "arXiv:1203.4802", - url = "http://arxiv.org/abs/1203.4802", -}</citation></xml> - <xml name="graph-citation"> - <citation type="doi">10.1073/pnas.1121464109</citation> - </xml> - <xml name="counting-citation"> - <citation type="doi">10.1371/journal.pone.0101271</citation> - </xml> -</macros>
--- a/normalize-by-median.xml Tue Aug 31 09:12:02 2021 +0000 +++ b/normalize-by-median.xml Thu Oct 03 13:45:11 2024 +0000 @@ -1,10 +1,10 @@ <tool id="khmer_normalize_by_median" name="khmer: Normalize By Median" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Filter reads using digital normalization via k-mer abundances</description> - <expand macro="bio_tools"/> <macros> <token name="@BINARY@">normalize-by-median.py</token> <import>macros.xml</import> </macros> + <expand macro="bio_tools"/> <expand macro="requirements" /> <expand macro="stdio" /> <expand macro="version" /> @@ -49,18 +49,18 @@ label="Optional k-mer countgraph" help="The inputs file(s) will be processed using the kmer counts in the specified k-mer countgraph file as a starting point." /> <param argument="--savegraph" name="save_countgraph" type="boolean" label="Save the k-mer countgraph(s) in a file" help="" /> - <param argument="--cutoff" name="cutoff" type="integer" min="1" value="20" label="Cutoff" help="" /> + <param argument="--cutoff" type="integer" min="1" value="20" label="Cutoff" help="" /> <expand macro="tableinputs" /> </inputs> <outputs> - <data name="countgraph" format="oxlicg" label="${tool.name} k-mer countgraph"> + <data name="countgraph" format="oxlicg" label="${tool.name} on ${on_string}: k-mer countgraph"> <filter>save_countgraph == True</filter> </data> - <data name="report" format="txt" label="${tool.name} report" /> + <data name="report" format="csv" label="${tool.name} on ${on_string}: report" /> <expand macro="output_sequences" extension="keep"/> </outputs> <tests> - <test> + <test expect_num_outputs="2"> <param name="inputs" value="test-abund-read-2.fa" ftype="fasta"/> <param name="type" value="specific" /> <param name="cutoff" value="1" /> @@ -74,7 +74,7 @@ </element> </output_collection> </test> - <test> + <test expect_num_outputs="2"> <param name="inputs" value="test-abund-read-2.fa.gz" ftype="fasta.gz"/> <param name="type" value="specific" /> <param name="cutoff" value="2" /> @@ -89,12 +89,13 @@ </element> </output_collection> </test> - <test> + <test expect_num_outputs="3"> <param name="inputs" value="test-abund-read-paired.fa" ftype="fasta"/> <param name="type" value="specific" /> <param name="cutoff" value="1" /> <param name="ksize" value="17" /> <param name="paired" value="true" /> + <param name="save_countgraph" value="true"/> <output name="report" file="normalize-by-median.paired.report.txt" /> <output_collection name="sequences" type="list"> <element name="test-abund-read-paired.fa" ftype="fasta"> @@ -104,6 +105,11 @@ </assert_contents> </element> </output_collection> + <output name="countgraph"> + <assert_contents> + <has_size size="1k"/> + </assert_contents> + </output> </test> </tests> <help><