Mercurial > repos > dfornika > mash
changeset 17:34869670b2d0 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit e7ac80b7bf62e50eb537b5adbe0a5ece8c4e77df-dirty"
author | dfornika |
---|---|
date | Mon, 24 Feb 2020 20:43:35 +0000 |
parents | 13516965dc59 |
children | ce64e11834dd |
files | macros.xml mash_screen.xml mash_sketch.xml test-data/ERR024951_seqtk_sample_1000_1.sketch.msh test-data/test_assembly.sketch.msh |
diffstat | 5 files changed, 50 insertions(+), 136 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu Feb 13 22:10:20 2020 +0000 +++ b/macros.xml Mon Feb 24 20:43:35 2020 +0000 @@ -3,4 +3,21 @@ <token name="@INTYPES@"> fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz </token> + + <xml name="citations"> + <citations> + <citation type="doi">10.1186/s13059-016-0997-x</citation> + </citations> + </xml> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">mash</requirement> + </requirements> + </xml> + + <xml name="version_command"> + <version_command>mash --version</version_command> + </xml> + </macros>
--- a/mash_screen.xml Thu Feb 13 22:10:20 2020 +0000 +++ b/mash_screen.xml Mon Feb 24 20:43:35 2020 +0000 @@ -1,14 +1,16 @@ -<tool id="mash_screen" name="mash screen" version="@TOOL_VERSION@+galaxy1" profile="19.01"> - <description>determines how well query sequences are contained within a pool of sequences.</description> +<tool id="mash_screen" name="mash screen" version="@TOOL_VERSION@+galaxy2" profile="19.01"> + <description>determines how well query sequences are contained within a pool of sequences</description> <macros> <import>macros.xml</import> </macros> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">mash</requirement> - </requirements> - <version_command>mash --version</version_command> + <expand macro="requirements" /> + <expand macro="version_command" /> <command detect_errors="exit_code"><![CDATA[ - ln -s '$queries' queries.msh && + #if str( $queries_input_source.queries_input_source_selector ) == "tool_data_table": + ln -s '$queries_input_source.queries.fields.path' queries.msh && + #elif str( $queries_input_source.queries_input_source_selector ) == 'history': + ln -s '$queries_input_source.queries' queries.msh && + #end if mash screen $winner_takes_all -i $minimum_identity_to_report @@ -43,28 +45,45 @@ <param name="pool" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> </when> </conditional> - <param name="queries" type="data" format="msh" /> - <param type="boolean" name="winner_takes_all" argument="-w" truevalue="-w" falsevalue=""/> - <param type="float" name="minimum_identity_to_report" argument="-i" value="0." min="-1." max="1." /> - <param type="float" name="maximum_p_value_to_report" argument="-v" value="1." min="0." max="1."/> + <conditional name="queries_input_source"> + <param name="queries_input_source_selector" type="select" label="Select queries from your history or use one from a tool data table?" help=""> + <option value="tool_data_table">Queries from tool data table</option> + <option selected="True" value="history">Queries from history</option> + </param> + <when value="tool_data_table"> + <param name="queries" type="select" label="Queries (Mash Sketch)"> + <options from_data_table="mash_sketches"/> + </param> + </when> + <when value="history"> + <param name="queries" type="data" format="msh" /> + </when> + </conditional> + <param name="winner_takes_all" argument="-w" type="boolean" checked="true" truevalue="-w" falsevalue="" label="'Winner takes all' to remove redundancy in the result" + help="If this option is not enabled, every matching strain from the same species of the reference database is reported in the result."/> + <param type="float" name="minimum_identity_to_report" argument="-i" value="0." min="-1." max="1." label="Minimum identity to report" /> + <param type="float" name="maximum_p_value_to_report" argument="-v" value="1." min="0." max="1." label="Maximum p-value to report" /> </inputs> <outputs> <data name="output" format="tabular" /> </outputs> <tests> <test> + <param name="queries_input_source_selector" value="history"/> <param name="queries" value="NZ_MYON01000010.1.msh"/> <param name="pool_input_selector" value="single"/> <param name="pool" value="ERR024951_seqtk_sample_1000_1.fastq"/> <output name="output" file="mash_screen_NZ_MYON01000010.1_ERR024951_seqtk_sample_1000_1.tsv"/> </test> <test> + <param name="queries_input_source_selector" value="history"/> <param name="queries" value="NZ_MYON01000010.1.msh"/> <param name="pool_input_selector" value="single"/> <param name="pool" value="ERR024951_seqtk_sample_1000_2.fastq"/> <output name="output" file="mash_screen_NZ_MYON01000010.1_ERR024951_seqtk_sample_1000_2.tsv"/> </test> <test> + <param name="queries_input_source_selector" value="history"/> <param name="queries" value="NZ_MYON01000010.1.msh"/> <param name="pool_input_selector" value="paired"/> <param name="pool_1" value="ERR024951_seqtk_sample_1000_1.fastq"/> @@ -73,7 +92,8 @@ </test> </tests> <help><![CDATA[ -Description: + +**What it does** Determine how well query sequences are contained within a pool of sequences. The queries must be formatted as a single Mash sketch file (.msh), created @@ -85,18 +105,5 @@ query-comment], where median-multiplicity is computed for shared hashes, based on the number of observations of those hashes within the pool. ]]></help> - <citations> - <citation type="bibtex"> -@article{ondov2016mash, - title={Mash: fast genome and metagenome distance estimation using MinHash}, - author={Ondov, Brian D and Treangen, Todd J and Melsted, P{\'a}ll and Mallonee, Adam B and Bergman, Nicholas H and Koren, Sergey and Phillippy, Adam M}, - journal={Genome biology}, - volume={17}, - number={1}, - pages={132}, - year={2016}, - publisher={BioMed Central} - } - </citation> - </citations> + <expand macro="citations"/> </tool>
--- a/mash_sketch.xml Thu Feb 13 22:10:20 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,110 +0,0 @@ -<tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy0" profile="19.01"> - <description> - Create a reduced representation of a sequence or set of sequences, based on min-hashes. - </description> - <macros> - <import>macros.xml</import> - </macros> - <requirements> - <requirement type="package" version="@TOOL_VERSION@">mash</requirement> - </requirements> - <version_command>mash --version</version_command> - <command detect_errors="exit_code"><![CDATA[ - mash sketch - -s '${sketch_size}' - -k '${kmer_size}' - -m '${minimum_kmer_copies}' - #if str ( $reads_assembly.reads_assembly_selector ) == "reads" - -r - #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired" - '$reads_input.reads_1' '$reads_input.reads_2' - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection" - '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse' - #end if - #if str( $reads_assembly.reads_input.reads_input_selector ) == "single" - '$reads_assembly.reads_input.reads' - #end if - #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly" - '${assembly}' - ${reads_assembly.individual_sequences} - #end if - -o 'sketch' - ]]></command> - <inputs> - <conditional name="reads_assembly"> - <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies"> - <option selected="True" value="reads">Reads</option> - <option value="assembly">Assembly</option> - </param> - <when value="reads"> - <conditional name="reads_input"> - <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data"> - <option value="paired">Paired</option> - <option value="single">Single</option> - <option value="paired_collection">Paired Collection</option> - </param> - <when value="paired"> - <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/> - <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/> - </when> - <when value="single"> - <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/> - </when> - <when value="paired_collection"> - <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/> - </when> - </conditional> - </when> - <when value="assembly"> - <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/> - <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences"/> - </when> - </conditional> - <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" /> - <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32"/> - <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000"/> - </inputs> - <outputs> - <data name="output" format="msh" from_work_dir="sketch.msh"/> - </outputs> - <tests> - <test> - <param name="reads_assembly_selector" value="reads" /> - <param name="reads_input_selector" value="single"/> - <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/> - <assert_stderr has_text="Estimated genome size:"/> - </test> - <test> - <param name="reads_assembly_selector" value="assembly" /> - <param name="assembly" value="test_assembly.fasta"/> - <assert_stderr has_text="Sketching"/> - </test> - </tests> - <help><![CDATA[ -Description: - - Create a sketch file, which is a reduced representation of a sequence or set - of sequences (based on min-hashes) that can be used for fast distance - estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can - be given to read from standard input. Input files can also be files of file - names (see -l). For output, one sketch file will be generated, but it can have - multiple sketches within it, divided by sequences or files (see -i). By - default, the output file name will be the first input file with a '.msh' - extension, or 'stdin.msh' if standard input is used (see -o). - ]]></help> - <citations> - <citation type="bibtex"> - @article{ondov2016mash, - title={Mash: fast genome and metagenome distance estimation using MinHash}, - author={Ondov, Brian D and Treangen, Todd J and Melsted, P{\'a}ll and Mallonee, Adam B and Bergman, Nicholas H and Koren, Sergey and Phillippy, Adam M}, - journal={Genome biology}, - volume={17}, - number={1}, - pages={132}, - year={2016}, - publisher={BioMed Central} - } - </citation> - </citations> -</tool>