Mercurial > repos > dfornika > snippy

--- a/macros.xml	Fri Jun 21 14:38:05 2019 -0400
+++ b/macros.xml	Sat Jan 25 00:00:54 2020 +0000
@@ -2,6 +2,7 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@VERSION@">snippy</requirement>
+            <requirement type="package" version="1.32">tar</requirement>
             <yield />
         </requirements>
     </xml>
@@ -10,7 +11,53 @@
         <version_command><![CDATA[snippy --version]]></version_command>
     </xml>

-    <token name="@VERSION@">4.3.6</token>
+    <token name="@REFERENCE_SOURCE_FILE@">
+        <![CDATA[
+        #if $reference_source.reference_source_selector == 'history'
+            #if $reference_source.ref_file.is_of_type("fasta")
+                ln -sf '$reference_source.ref_file' 'ref.fna' &&
+            #elif $reference_source.ref_file.is_of_type("genbank")
+                ln -sf '$reference_source.ref_file' 'ref.gbk' &&
+            #end if
+        #elif $reference_source.reference_source_selector == 'cached'
+            ln -sf '$reference_source.ref_file.fields.path' 'ref.fna' &&
+        #end if]]>
+    </token>
+
+    <token name="@REFERENCE_COMMAND@">
+        <![CDATA[
+        #if $reference_source.reference_source_selector == 'history'
+            #if $reference_source.ref_file.is_of_type("fasta")
+                --ref 'ref.fna'
+            #elif $reference_source.ref_file.is_of_type("genbank")
+                --ref 'ref.gbk'
+            #end if
+        #elif $reference_source.reference_source_selector == 'cached'
+            --ref 'ref.fna'
+        #end if
+        ]]>
+    </token>
+
+    <xml name="reference_selector">
+        <conditional name="reference_source">
+            <param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below. If you would like to perform self-mapping select `history` here, then choose your input file as reference.">
+                <option value="cached">Use a built-in genome index</option>
+                <option value="history">Use a genome from history and build index</option>
+            </param>
+            <when value="cached">
+                <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
+                    <options from_data_table="all_fasta">
+                        <validator type="no_options" message="No reference genomes are available" />
+                    </options>
+                </param>
+            </when>
+            <when value="history">
+                <param name="ref_file" type="data" format="fasta,genbank" label="Use the following dataset as the reference sequence" help="You can upload a FASTA or FASTQ sequence to the history and use it as reference" />
+            </when>
+        </conditional>
+    </xml>
+
+    <token name="@VERSION@">4.4.5</token>

     <xml name="citations">
       <citations>
--- a/snippy-core.xml	Fri Jun 21 14:38:05 2019 -0400
+++ b/snippy-core.xml	Sat Jan 25 00:00:54 2020 +0000
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="utf-8"?>
-<tool id="snippy_core" name="snippy-core" version="@VERSION@+galaxy3">
+<tool id="snippy_core" name="snippy-core" version="@VERSION@+galaxy1">
     <description>
         Combine multiple Snippy outputs into a core SNP alignment
     </description>
@@ -8,17 +8,20 @@
     </macros>
     <expand macro="requirements" />
     <command detect_errors="exit_code"><![CDATA[
+        @REFERENCE_SOURCE_FILE@
         #for $indir in $indirs
-            tar -xf '$indir' &&
+            #set $sample_name = os.path.splitext(os.path.basename(str($indir.name)))[0]
+            mkdir '$sample_name' && tar -xf '$indir' -C '$sample_name' --strip-components=1 &&
         #end for
+        #set snippy_dirs = " ".join(["'{0}'".format(os.path.splitext(os.path.basename(str($indir.name)))[0]) for $indir in $indirs])
         snippy-core
-            --ref '$ref'
-            `ls -1 -I "*.dat" -I "*.log" | tr '\n' ' '`
+            @REFERENCE_COMMAND@
+            ${snippy_dirs}
     ]]></command>

     <inputs>
-        <param name="indirs" type="data" multiple="true" format="tar" label="Snippy input zipped dirs" help="Select all the snippy inputs for alignment" />
-        <param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" />
+        <param name="indirs" type="data" multiple="true" format="zip" label="Snippy input zipped dirs" help="Select all the snippy inputs for alignment" />
+        <expand macro="reference_selector" />
         <param name="outputs" type="select" multiple="true" display="checkboxes" label="Output selection">
             <option value="outaln" selected="True">A core SNP alignment in the fasta format</option>
             <option value="outfull" selected="False">A whole genome SNP alignment (includes invariant sites)</option>
@@ -38,7 +41,7 @@
         <data format="tabular" name="alignment_table" label="${tool.name} on ${on_string} core alignment table" from_work_dir="core.tab">
             <filter>outputs and 'outtab' in outputs</filter>
         </data>
-        <data format="tabular" name="alignment_summary" label="${tool.name} on ${on_string} core alignment summary" from_work_dir="core.txt">
+        <data format="txt" name="alignment_summary" label="${tool.name} on ${on_string} core alignment summary" from_work_dir="core.txt">
             <filter>outputs and 'outtxt' in outputs</filter>
         </data>
     </outputs>
@@ -46,9 +49,21 @@
     <tests>
         <test><!-- Test #1 - test with 3 zipped directories -->
             <param name="indirs" value="a.tgz,b.tgz,c.tgz" />
-            <param name="ref" value="reference.fasta" />
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="history"/>
+                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
+            </conditional>
             <param name="outputs" value="outtxt" />
-            <output name="alignment_summary" ftype="tabular" file="a_b_c.core.txt" />
+            <output name="alignment_summary" ftype="txt" file="a_b_c.core.txt" />
+        </test>
+        <test><!-- Test #2 - test with 3 zipped directories -->
+            <param name="indirs" value="a.tgz,b.tgz,c.tgz" />
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="cached"/>
+                <param name="ref_file" value="test_id"/>
+            </conditional>
+            <param name="outputs" value="outtxt" />
+            <output name="alignment_summary" ftype="txt" file="a_b_c.core.txt" />
         </test>
     </tests>
--- a/snippy.xml	Fri Jun 21 14:38:05 2019 -0400
+++ b/snippy.xml	Sat Jan 25 00:00:54 2020 +0000
@@ -1,15 +1,17 @@
-<tool id="snippy" name="snippy" version="@VERSION@+galaxy3">
+<tool id="snippy" name="snippy" version="@VERSION@+galaxy1">
   <description>
       Snippy finds SNPs between a haploid reference genome and your NGS sequence reads.
-  </description>
-  <macros>
-      <import>macros.xml</import>
-  </macros>
-  <expand macro="requirements" />
-  <expand macro="version_command" />
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version_command" />

     <command detect_errors="exit_code"><![CDATA[

+        @REFERENCE_SOURCE_FILE@
+
         #import re
         #if str( $fastq_input.fastq_input_selector ) == "paired"
             #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input1.element_identifier)
@@ -21,22 +23,11 @@
             #set $dir_name = re.sub('[^\w_]', '_', $fastq_input.fastq_input_interleaved.element_identifier)
         #end if

-        #if $ref.is_of_type("fasta")
-            cp '$ref' 'ref.fna' &&
-        #end if
-        #if $ref.is_of_type("genbank")
-            cp '$ref' 'ref.gbk' &&
-        #end if
         snippy
-            --outdir '$dir_name'
+            --outdir '${dir_name}'
             --cpus \${GALAXY_SLOTS:-1}
             --ram \$((\${GALAXY_MEMORY_MB:-4096}/1024))
-            #if $ref.is_of_type("fasta")
-                --ref 'ref.fna'
-            #end if
-            #if $ref.is_of_type("genbank")
-                --ref 'ref.gbk'
-            #end if
+            @REFERENCE_COMMAND@
             --mapqual $adv.mapqual
             --mincov $adv.mincov
             --minfrac $adv.minfrac
@@ -62,20 +53,16 @@

         &&

-        cp -r '$dir_name' 'out' &&
-
-        tar -czf 'out.tgz' '${dir_name}'
         #if "outcon" in str($outputs) and $adv.rename_cons
-          && sed -i 's/>.*/>${dir_name}/' out/snps.consensus.fa
+          && sed -i 's/>.*/>${dir_name}/' ${dir_name}/snps.consensus.fa
         #end if
-
-
-    ]]></command>
+
+        && mv ${dir_name} out
+        && tar -czf out.tgz out
+    ]]>    </command>

     <inputs>
-
-        <param name="ref" type="data" format="fasta,genbank" label="Reference File (either in fasta or genbank format)" help="Fasta or Genbank file to use as the reference" />
-
+        <expand macro="reference_selector" />
         <conditional name="fastq_input">
             <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
                 <option value="paired">Paired</option>
@@ -116,7 +103,6 @@
             <option value="outlog" selected="False">A log file with the commands run and their outputs</option>
             <option value="outaln" selected="False">A version of the reference but with - at position with depth=0 and N for 0 to depth to --mincov (does not have variants)</option>
             <option value="outcon" selected="False">A version of the reference genome with all variants instantiated</option>
-            <option value="outdep" selected="False">Output of samtools depth for the .bam file</option>
             <option value="outbam" selected="False">The alignments in BAM format. Note that multi-mapping and unmapped reads are not present.</option>
             <option value="outzip" selected="True">Zipped files needed for input into snippy-core</option>
         </param>
@@ -146,13 +132,10 @@
         <data format="fasta" name="snpconsensus" label="${tool.name} on ${on_string} consensus fasta" from_work_dir="out/snps.consensus.fa">
             <filter>outputs and 'outcon' in outputs</filter>
         </data>
-        <data format="tabular" name="snpsdepth" label="${tool.name} on ${on_string} mapping depth" from_work_dir="out/snps.depth">
-            <filter>outputs and 'outdep' in outputs</filter>
-        </data>
         <data format="bam" name="snpsbam" label="${tool.name} on ${on_string} mapped reads (bam)" from_work_dir="out/snps.bam">
             <filter>outputs and 'outbam' in outputs</filter>
         </data>
-        <data format="tar" name="outdir" label="${tool.name} on ${on_string} dir for snippy core" from_work_dir="out.tgz">
+        <data format="zip" name="outdir" label="${tool.name} on ${on_string} dir for snippy core" from_work_dir="out.tgz">
             <filter>outputs and 'outzip' in outputs</filter>
         </data>

@@ -160,8 +143,12 @@

     <tests>

-        <test> <!-- test 0 - fasta ref no snps -->
-            <param name="ref" value="reference.fasta" ftype="fasta" />
+        <test>            <!-- test 0 - fasta ref no snps -->
+            <!-- <param name="ref" value="reference.fasta" ftype="fasta" /> -->
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="history"/>
+                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
+            </conditional>
             <param name="fastq_input_selector" value="paired" />
             <param name="fastq_input1" ftype="fastqsanger" value="a_1.fastq" />
             <param name="fastq_input2" ftype="fastqsanger" value="a_2.fastq" />
@@ -172,8 +159,11 @@
             <output name="snpgff" ftype="gff3" file="a_fna_ref_mincov_2_minqual_60.snps.gff" />
         </test>

-        <test> <!-- test 1 - fasta ref one snp -->
-            <param name="ref" value="reference.fasta" ftype="fasta" />
+        <test>            <!-- test 1 - fasta ref one snp -->
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="history"/>
+                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
+            </conditional>
             <param name="fastq_input_selector" value="paired" />
             <param name="fastq_input1" ftype="fastqsanger" value="b_1.fastq" />
             <param name="fastq_input2" ftype="fastqsanger" value="b_2.fastq" />
@@ -184,8 +174,11 @@
             <output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" />
         </test>

-        <test> <!-- test 2 - fasta ref one snp paired_collection -->
-            <param name="ref" value="reference.fasta" ftype="fasta" />
+        <test>            <!-- test 2 - fasta ref one snp paired_collection -->
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="history"/>
+                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
+            </conditional>
             <param name="fastq_input_selector" value="paired_collection" />
             <param name="fastq_input">
                 <collection type="paired">
@@ -200,8 +193,25 @@
             <output name="snpgff" ftype="gff3" file="b_fna_ref_mincov_2_minqual_60.snps.gff" />
         </test>

-        <test> <!-- test 3 - fasta ref one snp single -->
-            <param name="ref" value="reference.fasta" ftype="fasta" />
+        <test>            <!-- test 3 - fasta ref one snp single -->
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="history"/>
+                <param name="ref_file" value="reference.fasta" ftype="fasta"/>
+            </conditional>
+            <param name="fastq_input_selector" value="single" />
+            <param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" />
+            <param name="mincov" value="2" />
+            <param name="minqual" value="60" />
+            <param name="outputs" value="outgff,outsum" />
+            <output name="snpsum" ftype="tabular" file="b_fna_ref_mincov_2_minqual_60.snps.txt" lines_diff="6" />
+            <output name="snpgff" ftype="gff3" file="b_2_fna_ref_mincov_2_minqual_60.snps.gff" />
+        </test>
+
+        <test>            <!-- test 4 - reference source as cached -->
+            <conditional name="reference_source">
+                <param name="reference_source_selector" value="cached"/>
+                <param name="ref_file" value="test_id"/>
+            </conditional>
             <param name="fastq_input_selector" value="single" />
             <param name="fastq_input_single" value="b_2.fastq" ftype="fastqsanger" />
             <param name="mincov" value="2" />
@@ -248,7 +258,7 @@

     For a much more in depth description of snippy and how it works, see https://github.com/tseemann/snippy

-    ]]></help>
-  <expand macro="citations"/>
+    ]]>    </help>
+    <expand macro="citations"/>

 </tool>
--- a/test-data/a_fna_ref_mincov_2_minqual_60.snps.txt	Fri Jun 21 14:38:05 2019 -0400
+++ b/test-data/a_fna_ref_mincov_2_minqual_60.snps.txt	Sat Jan 25 00:00:54 2020 +0000
@@ -2,5 +2,5 @@
 ReadFiles	a_1.fastq a_2.fastq
 Reference	reference.fasta
 ReferenceSize	700
-Software	snippy 4.3.6
+Software	snippy 4.4.5
 VariantTotal	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/all_fasta.loc	Sat Jan 25 00:00:54 2020 +0000
@@ -0,0 +1,20 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
+test_id	test_dbkey	test display name	${__HERE__}/ref.fna
+
--- a/test-data/b_2_fna_ref_mincov_2_minqual_60.snps.gff	Fri Jun 21 14:38:05 2019 -0400
+++ b/test-data/b_2_fna_ref_mincov_2_minqual_60.snps.gff	Sat Jan 25 00:00:54 2020 +0000
@@ -1,2 +1,2 @@
 ##gff-version 3
-reference	snippy:4.3.6	variation	4	4	.	.	0	note=snp A=>T T:5 A:0
+reference	snippy:4.4.5	variation	4	4	.	.	0	note=snp A=>T T:5 A:0
--- a/test-data/b_fna_ref_mincov_2_minqual_60.snps.gff	Fri Jun 21 14:38:05 2019 -0400
+++ b/test-data/b_fna_ref_mincov_2_minqual_60.snps.gff	Sat Jan 25 00:00:54 2020 +0000
@@ -1,2 +1,2 @@
 ##gff-version 3
-reference	snippy:4.3.6	variation	4	4	.	.	0	note=snp A=>T T:10 A:0
+reference	snippy:4.4.5	variation	4	4	.	.	0	note=snp A=>T T:10 A:0
--- a/test-data/b_fna_ref_mincov_2_minqual_60.snps.txt	Fri Jun 21 14:38:05 2019 -0400
+++ b/test-data/b_fna_ref_mincov_2_minqual_60.snps.txt	Sat Jan 25 00:00:54 2020 +0000
@@ -2,6 +2,6 @@
 ReadFiles	b_1.fastq b_2.fastq
 Reference	reference.fasta
 ReferenceSize	700
-Software	snippy 4.3.6
+Software	snippy 4.4.5
 Variant-SNP	1
 VariantTotal	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Sat Jan 25 00:00:54 2020 +0000
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>	<dbkey>	<display_name>	<file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3	apiMel3	Honeybee (Apis mellifera): apiMel3	/path/to/genome/apiMel3/apiMel3.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/path/to/genome/hg19/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Sat Jan 25 00:00:54 2020 +0000
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Sat Jan 25 00:00:54 2020 +0000
@@ -0,0 +1,8 @@
+<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_fasta.loc" />
+    </table>
+</tables>