changeset 17:34869670b2d0 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mash commit e7ac80b7bf62e50eb537b5adbe0a5ece8c4e77df-dirty"
author dfornika
date Mon, 24 Feb 2020 20:43:35 +0000
parents 13516965dc59
children ce64e11834dd
files macros.xml mash_screen.xml mash_sketch.xml test-data/ERR024951_seqtk_sample_1000_1.sketch.msh test-data/test_assembly.sketch.msh
diffstat 5 files changed, 50 insertions(+), 136 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Thu Feb 13 22:10:20 2020 +0000
+++ b/macros.xml	Mon Feb 24 20:43:35 2020 +0000
@@ -3,4 +3,21 @@
     <token name="@INTYPES@">
         fasta,fasta.gz,fastq,fastq.gz,fastqsanger,fastqsanger.gz
     </token>
+    
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1186/s13059-016-0997-x</citation>
+        </citations>
+    </xml>
+
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">mash</requirement>
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>mash --version</version_command>
+    </xml>
+
 </macros>
--- a/mash_screen.xml	Thu Feb 13 22:10:20 2020 +0000
+++ b/mash_screen.xml	Mon Feb 24 20:43:35 2020 +0000
@@ -1,14 +1,16 @@
-<tool id="mash_screen" name="mash screen" version="@TOOL_VERSION@+galaxy1" profile="19.01">
-    <description>determines how well query sequences are contained within a pool of sequences.</description>
+<tool id="mash_screen" name="mash screen" version="@TOOL_VERSION@+galaxy2" profile="19.01">
+    <description>determines how well query sequences are contained within a pool of sequences</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="@TOOL_VERSION@">mash</requirement>
-    </requirements>
-    <version_command>mash --version</version_command>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
     <command detect_errors="exit_code"><![CDATA[
-        ln -s '$queries' queries.msh &&
+        #if str( $queries_input_source.queries_input_source_selector ) == "tool_data_table":
+          ln -s '$queries_input_source.queries.fields.path' queries.msh &&
+        #elif str( $queries_input_source.queries_input_source_selector ) == 'history':
+          ln -s '$queries_input_source.queries' queries.msh &&
+        #end if
         mash screen
              $winner_takes_all
              -i $minimum_identity_to_report
@@ -43,28 +45,45 @@
                 <param name="pool" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
             </when>
         </conditional>
-        <param name="queries" type="data" format="msh" />
-        <param type="boolean" name="winner_takes_all" argument="-w" truevalue="-w" falsevalue=""/>
-        <param type="float" name="minimum_identity_to_report" argument="-i" value="0." min="-1." max="1." />
-        <param type="float" name="maximum_p_value_to_report" argument="-v" value="1." min="0." max="1."/>
+        <conditional name="queries_input_source">
+            <param name="queries_input_source_selector" type="select" label="Select queries from your history or use one from a tool data table?" help="">
+                <option value="tool_data_table">Queries from tool data table</option>
+                <option selected="True" value="history">Queries from history</option>
+            </param>
+            <when value="tool_data_table">
+                <param name="queries" type="select" label="Queries (Mash Sketch)">
+                    <options from_data_table="mash_sketches"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="queries" type="data" format="msh" />
+            </when>
+        </conditional>
+        <param name="winner_takes_all" argument="-w" type="boolean" checked="true" truevalue="-w" falsevalue="" label="'Winner takes all' to remove redundancy in the result"
+            help="If this option is not enabled, every matching strain from the same species of the reference database is reported in the result."/>
+        <param type="float" name="minimum_identity_to_report" argument="-i" value="0." min="-1." max="1." label="Minimum identity to report" />
+        <param type="float" name="maximum_p_value_to_report" argument="-v" value="1." min="0." max="1." label="Maximum p-value to report" />
     </inputs>
     <outputs>
         <data name="output" format="tabular" />
     </outputs>
     <tests>
         <test>
+            <param name="queries_input_source_selector" value="history"/>
             <param name="queries" value="NZ_MYON01000010.1.msh"/>
             <param name="pool_input_selector" value="single"/>
             <param name="pool" value="ERR024951_seqtk_sample_1000_1.fastq"/>
             <output name="output" file="mash_screen_NZ_MYON01000010.1_ERR024951_seqtk_sample_1000_1.tsv"/>
         </test>
         <test>
+            <param name="queries_input_source_selector" value="history"/>
             <param name="queries" value="NZ_MYON01000010.1.msh"/>
             <param name="pool_input_selector" value="single"/>
             <param name="pool" value="ERR024951_seqtk_sample_1000_2.fastq"/>
             <output name="output" file="mash_screen_NZ_MYON01000010.1_ERR024951_seqtk_sample_1000_2.tsv"/>
         </test>
         <test>
+            <param name="queries_input_source_selector" value="history"/>
             <param name="queries" value="NZ_MYON01000010.1.msh"/>
             <param name="pool_input_selector" value="paired"/>
             <param name="pool_1" value="ERR024951_seqtk_sample_1000_1.fastq"/>
@@ -73,7 +92,8 @@
         </test>
     </tests>
     <help><![CDATA[
-Description:
+
+**What it does**
 
   Determine how well query sequences are contained within a pool of sequences.
   The queries must be formatted as a single Mash sketch file (.msh), created
@@ -85,18 +105,5 @@
   query-comment], where median-multiplicity is computed for shared hashes, based
   on the number of observations of those hashes within the pool.
   ]]></help>
-    <citations>
-        <citation type="bibtex">
-@article{ondov2016mash,
-  title={Mash: fast genome and metagenome distance estimation using MinHash},
-  author={Ondov, Brian D and Treangen, Todd J and Melsted, P{\'a}ll and Mallonee, Adam B and Bergman, Nicholas H and Koren, Sergey and Phillippy, Adam M},
-  journal={Genome biology},
-  volume={17},
-  number={1},
-  pages={132},
-  year={2016},
-  publisher={BioMed Central}
-  }
-        </citation>
-    </citations>
+    <expand macro="citations"/>
 </tool>
--- a/mash_sketch.xml	Thu Feb 13 22:10:20 2020 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-<tool id="mash_sketch" name="mash sketch" version="@TOOL_VERSION@+galaxy0" profile="19.01">
-    <description>
-        Create a reduced representation of a sequence or set of sequences, based on min-hashes.
-    </description>
-    <macros>
-        <import>macros.xml</import>
-    </macros>
-    <requirements>
-        <requirement type="package" version="@TOOL_VERSION@">mash</requirement>
-    </requirements>
-    <version_command>mash --version</version_command>
-    <command detect_errors="exit_code"><![CDATA[
-        mash sketch
-            -s '${sketch_size}'
-            -k '${kmer_size}'
-            -m '${minimum_kmer_copies}'
-            #if str ( $reads_assembly.reads_assembly_selector ) == "reads"
-              -r
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired"
-                '$reads_input.reads_1' '$reads_input.reads_2'
-              #end if
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "paired_collection"
-                '$reads_assembly.reads_input.reads.forward' '$reads_assembly.reads_input.reads.reverse'
-              #end if
-              #if str( $reads_assembly.reads_input.reads_input_selector ) == "single"
-                '$reads_assembly.reads_input.reads'
-              #end if
-            #elif str ( $reads_assembly.reads_assembly_selector ) == "assembly"
-              '${assembly}'
-              ${reads_assembly.individual_sequences}
-            #end if
-            -o 'sketch'
-    ]]></command>
-    <inputs>
-        <conditional name="reads_assembly">
-            <param name="reads_assembly_selector" type="select" label="Input: Reads or Assemblies">
-                <option selected="True" value="reads">Reads</option>
-                <option value="assembly">Assembly</option>
-            </param>
-            <when value="reads">
-                <conditional name="reads_input">
-                    <param name="reads_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
-                        <option value="paired">Paired</option>
-                        <option value="single">Single</option>
-                        <option value="paired_collection">Paired Collection</option>
-                    </param>
-                    <when value="paired">
-                        <param name="reads_1" type="data" format="@INTYPES@" label="Select first set of reads" help="Specify dataset with forward reads"/>
-                        <param name="reads_2" type="data" format="@INTYPES@" label="Select second set of reads" help="Specify dataset with reverse reads"/>
-                    </when>
-                    <when value="single">
-                        <param name="reads" type="data" format="@INTYPES@" label="Select fastq dataset" help="Specify dataset with single reads"/>
-                    </when>
-                    <when value="paired_collection">
-                        <param name="reads" format="@INTYPES@" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
-                    </when>
-                </conditional>
-            </when>
-            <when value="assembly">
-                <param name="assembly" type="data" format="fasta,fasta.gz" label="Assembly"/>
-                <param type="boolean" name="individual_sequences" truevalue="-i" falsevalue="" label="Sketch individual Sequences"/> 
-            </when>
-        </conditional>
-        <param type="integer" name="sketch_size" argument="-s" value="1000" min="10" max="1000000" />
-        <param type="integer" name="kmer_size" argument="-k" value="21" min="1" max="32"/>
-        <param type="integer" name="minimum_kmer_copies" argument="-m" value="1" min="1" max="1000"/>
-    </inputs>
-    <outputs>
-        <data name="output" format="msh" from_work_dir="sketch.msh"/>
-    </outputs>
-    <tests>
-        <test>
-            <param name="reads_assembly_selector" value="reads" />
-            <param name="reads_input_selector" value="single"/>
-            <param name="reads" value="ERR024951_seqtk_sample_1000_1.fastq"/>
-            <assert_stderr has_text="Estimated genome size:"/>
-        </test>
-        <test>
-            <param name="reads_assembly_selector" value="assembly" />
-            <param name="assembly" value="test_assembly.fasta"/>
-            <assert_stderr has_text="Sketching"/>
-        </test>
-    </tests>
-    <help><![CDATA[
-Description:
-
-  Create a sketch file, which is a reduced representation of a sequence or set
-  of sequences (based on min-hashes) that can be used for fast distance
-  estimations. Inputs can be fasta or fastq files (gzipped or not), and "-" can
-  be given to read from standard input. Input files can also be files of file
-  names (see -l). For output, one sketch file will be generated, but it can have
-  multiple sketches within it, divided by sequences or files (see -i). By
-  default, the output file name will be the first input file with a '.msh'
-  extension, or 'stdin.msh' if standard input is used (see -o).
-  ]]></help>
-    <citations>
-        <citation type="bibtex">
-            @article{ondov2016mash,
-                title={Mash: fast genome and metagenome distance estimation using MinHash},
-                author={Ondov, Brian D and Treangen, Todd J and Melsted, P{\'a}ll and Mallonee, Adam B and Bergman, Nicholas H and Koren, Sergey and Phillippy, Adam M},
-                journal={Genome biology},
-                volume={17},
-                number={1},
-                pages={132},
-                year={2016},
-                publisher={BioMed Central}
-            }
-        </citation>
-    </citations>
-</tool>
Binary file test-data/ERR024951_seqtk_sample_1000_1.sketch.msh has changed
Binary file test-data/test_assembly.sketch.msh has changed