changeset 0:30c2b84b4117 draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/scrappie commit bf5788ad5a3293446a50a3246b44ba09174c9b71
author jdv
date Wed, 30 Aug 2017 02:55:35 -0400
parents
children 52ba9fde200f
files scrappie_raw.py scrappie_raw.xml test-data/test_data.fast5.tar.gz test-data/test_data.fasta
diffstat 4 files changed, 150 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scrappie_raw.py	Wed Aug 30 02:55:35 2017 -0400
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+
+import sys, os
+import glob
+import tarfile
+import subprocess
+import shutil
+import h5py
+import numpy as np
+
+def main():
+    tar_file = sys.argv[1]
+    out_file = sys.argv[2]
+    threads  = sys.argv[3]
+
+    extract_fast5(tar_file)
+    with open(out_file, "w") as outfile:
+        subprocess.call(["scrappie",
+            "raw",
+            "--threads", threads,
+            "--outformat", "fasta",
+            "in_dir" ],
+            stdout=outfile )
+
+def extract_fast5(fn):
+
+    try:
+        in_dir = "in_dir"
+        if not os.path.exists(in_dir):
+            os.makedirs(in_dir)
+
+        tar = tarfile.open(fn, mode='r')
+        tar.extractall(path=in_dir)
+
+        files = glob.glob(
+            os.path.join(in_dir, "**", "*.fast5"),
+            recursive=True
+        )
+        if len(files) < 1:
+            raise ValueError('No FAST5 files found')
+        for f in files:
+            shutil.copy(f, in_dir)
+
+    except OSError as e:
+        print("Unexpected error:", e.strerror)
+        raise
+
+    except:
+        print("Unexpected error:", sys.exc_info()[0])
+        raise
+
+if __name__ == "__main__" :
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scrappie_raw.xml	Wed Aug 30 02:55:35 2017 -0400
@@ -0,0 +1,81 @@
+<tool id="scrappie_raw" name="Scrappie (raw)" version="1.0.0">
+
+    <description>ONT development basecaller</description>
+
+    <!-- ***************************************************************** -->
+   
+    <!--
+    <requirements>
+        <requirement type="package" version="1.0.0">scrappie</requirement>
+    </requirements>
+    -->
+
+    <!-- ***************************************************************** -->
+
+    <version_command>scrappie version | perl -wnE'print "$1\n" for /scrappie (.+)/g'</version_command>
+
+    <!-- ***************************************************************** -->
+
+    <command detect_errors="aggressive">
+    <![CDATA[
+
+    python3 $__tool_directory__/scrappie_raw.py $input $output \${GALAXY_SLOTS:-1}
+
+    ]]>
+    </command>
+
+    <!-- ***************************************************************** -->
+
+    <inputs>
+
+        <param name="input" type="data" format="fast5_archive" label="Input reads" />
+
+    </inputs>
+
+    <!-- ***************************************************************** -->
+
+    <outputs>
+
+        <data name="output" format="fasta" label="${tool.name} on ${on_string} (called.fasta)" />
+
+    </outputs>
+
+    <!-- ***************************************************************** -->
+
+    <tests>
+        <!-- multithreaded output is non-deterministic, so simply compare file
+        sizes -->
+        <test>
+            <param name="input" value="test_data.fast5.tar.gz" ftype="fast5_archive" />
+            <output name="output" file="test_data.fasta" compare="sim_size" delta="0" />
+        </test>
+    </tests>
+
+    <!-- ***************************************************************** -->
+
+    <help>
+    <![CDATA[
+
+**Description**
+
+Scrappie provides recurrent neural network basecalling for Oxford Nanopore
+MinION data. It is a technology demonstrator for the Oxford Nanopore
+Research Algorithms group. It is designed for improved calling of
+homopolymers over nanonet. Scrappie is provided unsupported by Oxford
+Nanopore Technologies.
+
+The Galaxy wrapper has modified scrappie to take a gzip tarball of FAST5 reads
+as input, such as can be produced by `poretools combine`, and always outputs a
+single FASTA file.
+
+This is the raw basecaller.
+
+    ]]>
+    </help>
+
+    <!-- ***************************************************************** -->
+    
+    <citations>
+    </citations>
+
+</tool>
Binary file test-data/test_data.fast5.tar.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_data.fasta	Wed Aug 30 02:55:35 2017 -0400
@@ -0,0 +1,16 @@
+>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12068_ch_399_strand  { "normalised_score" : 0.190867,  "nblock" : 3004,  "sequence_length" : 301,  "blocks_per_base" : 9.980066 }
+GCCTGGATCGTGGCGGTGGGCGGCGGAGGCGGAGGCGGCGTGGGCGAGGCGGTGGGCGCCGGCGTGGCGGGCGGTGGGGCCGGGGCGGTGGCGCGGTGGGCGGGCGGGAGTGGGAGGAGGCGGGTGGGAGGCGGCGGAAGCGGGCGGGGCGGGAGGGGTGGAGGCGTGAGGCGGTGATGGGGGGCGCGGTGCAAGTAGGCCGAGGCCGAGAAATGTGGGGAGCATCAGGTGGGCGGGAGGTGGCGGGTGAGGCGTCATGAAAGCCAGGCTCGTGACTCGGGAAGTGATAGCTCAGGAAGCA
+>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12091_ch_399_strand  { "normalised_score" : 0.131128,  "nblock" : 5237,  "sequence_length" : 348,  "blocks_per_base" : 15.048851 }
+TGTGGGTGGGTGGGCGGGCGGTGGTGGCGGTGGGCGGGCGGGCGGGCGGGCGGTGGGGCGGGCGGTGTGGGGGTGGCGGGGCGGGTGGCGTGGGCGGGCGGTGGGGCGGGCGTGGCGTGGGCGGGGTGGGTGGGAGGCGGGAGGCGTGGGCGGGCGGGAGGCGGGTGGCGGGGGTGGGTGGGAGCGGGTGCGGGGGCGGGGAGGTGGGAGGCGGCGCGGGGCGGGAGGCGGGCGGTGGGGAGGCGGGGCGGGTGGGGGCGGGTGGGGAAGGGAGGCGGGCGGGGCGGGATGGAAGGGAGTGGGCAGGGGCCGGGGAGCAGGGGAAGGCAGGGGCAGTGATGGGGGCAG
+>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12153_ch_399_strand  { "normalised_score" : 0.156256,  "nblock" : 3279,  "sequence_length" : 303,  "blocks_per_base" : 10.821782 }
+CTGGTGTGTAGATGGATGTCCAGTGGGCGGGTGACGGGGTGGTGCCCTGGTGGGCGTGGTGATGATGGGTGTGGGTGGTATTCTGATGGAATGAGTGTGGCAGTGGGTGGGTTAGTGGCGGTGTAACGTCGGCGTGTGGCAGGGAATGTTGGTGGTCTGATGACAGGTGGGATGAGTCAATGAGTGGTGTTAGTTGTGAGTTGTGATGATGATGATTTGATAGATGATAATAATGGAGATAGATCAAGTCAATGGGATGACATGGTGTAAGTGTGTTCGGTGTTTGACCAGTTGGGTGATTAT
+>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12175_ch_399_strand  { "normalised_score" : 0.158499,  "nblock" : 6272,  "sequence_length" : 563,  "blocks_per_base" : 11.140320 }
+ACGTGTGGGTCAGATGGGCGGTGGTGAGGCGGGCCGGTGGAGATGGGTGTGGCAGTCGAGGCGTGGTCCTGTCGTGCACGTCGGGCGCCGGTGGCGGAGGCCGGGGGTGGAGGCGGTGGGCTCGTGGGGTGGCTGAGGGGCGGGGCCGGTGGCTGGGGGTGGCGGGCGGTGGGCGGGCGGGCGGGTGGAGGCGGCGTGGTGGGGGCGGTGGGGCGGGCTGGGTGGGCGGGGGTGGGTGGGGCTGGGTGGGTGGGGAGGCGGGCGGTGGGAAGCGGGGTGGGCGGGTGGAAGGGCGGTGGGAGGCGGGAGGCGGGTGAAGGGTGGGAGGCGGAGGTGGGGCGGGGCGGGGTGGCGGGCGTGGGGGGCGGGGTGGGGTGGCGGGCTCACGGTGGCGGGGGCGGAGTGAGGTGGGAGGCGGTGGGGTGGCGGTGGCGGCGGGAGTGGAGTCCGTGCGTGGTGGGGCGGTGTGATCAGTGTAGGTGAGAGCGGGTGCGGGGGGCGTGGGAGGGGTGGCTGGGGTGGGGAGGCGGAGGGCGGTGTGGGGTGACGTGGGCGTGGGCATG
+>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12081_ch_399_strand  { "normalised_score" : 0.134486,  "nblock" : 8572,  "sequence_length" : 409,  "blocks_per_base" : 20.958435 }
+TGGGTGCGTGTGCGGGCGGGTGGTGGAGTGTGGTTGTGGGTGTGGTGGTGACTCGGGCTGTGCTGGTGGCTTGTGGACTCGGGTGGTGGCGGTGGGGCGGAGGTTGGGAATGGGGCGTGGGGGGTGGTGCGTGGGAGGCGGGTGGGGGAGGCGGGTGGAGGGGCGGCGGCGGGAGGCGGAAGGGCGCGGCGGGCGGGTGGGCGGTGGGCGGAAGGGTGGGTGGGGAGGCGGGTGGGCGGGCGGGCGGAGGCGGGGGCGGGCGGGAAGGGCGGGGCGGAGTGGGAAGGGCGGGGCGGAAGGTGGGTGGGACGGGGCGGGGGCGTGGGAGCGGGTGGGCGGGTGGGGCGGGCGGGTGGTGGGAGGCGGGCGGCGGGGCGGGGCGCGCGCGGGTGGGGTGCGGGGTGGGAGC
+>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12113_ch_399_strand  { "normalised_score" : 0.154456,  "nblock" : 5194,  "sequence_length" : 488,  "blocks_per_base" : 10.643442 }
+TATCGTTGGTGGGTGGGCGGTAGATGGTGGGTTGGTGGTGGGTGGGTGTGTTGGTGGTGGATGGTGTTGTGGTGGTGGTGTGGGATGGGGTGCGTGGGTGGGTGGGGTTAATAGATGGGTTGAGTGGTGGGTGTTGTGTGGCGTGGTTGACCGTGAGAGTGGTGGCCGTGGGTTTGGTGGGCTGAGTGAGCAGGCCATGTGTTTTGTGGTGGTGTGGTGGGTAGGGGGGGCGCCGGTGTGATGGTGATGTTTGGCCGTCAGTGTGGGTGGAATGGTGGGCGGGCGGCTGAGTGGGTGCCGGGGTATGGGGTATGAAGTAAACAAAGCTATCTGTTTTGCGCGATGGCTGGCTGGAGTATTAGGGCCGAGTGGCATGGCATAGCTGCATGAGAACCTGGTAGGCGGCCGTCGGGGCGGTGCTCCGATGCTAGCAGATGGGTGGGCATGTGAGGCGTGCATGGGCGCGTGTCCAGGAAGTGTGTTTTGTG
+>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12085_ch_399_strand  { "normalised_score" : 0.133404,  "nblock" : 4509,  "sequence_length" : 287,  "blocks_per_base" : 15.710801 }
+ATCTGGATTGGGTCAGTGGGTGGATGGAGCGGATGGTGTGGTTGGTGGGTGGTGATGGTGGTCGGGGTGGTGGAATGTCGTTGGTGGAGGTTGGGTTGGATGGTGGGCGGTGGGTGGGGCTGGGTGGTGGTGGTGGGTGCGTGGTGCGATGGGGCGTGTTGGGTGGGTGTGTGGGCGGGTGGGCAGTGGGTGGGTGGGTGGCGTGGGCGGTGGGCGGGCCGTGGGCGGGCGGGCGGGTGGGCGGGTGGGCGGGAGCGGGCGGGCGGGCGGGTGGGCGAGCGAGGAAG
+>macs_MacBook_local_20170809_FAH11850_MN21227_sequencing_run_AIV_run_1_84332_read_12103_ch_399_strand  { "normalised_score" : 0.195423,  "nblock" : 4268,  "sequence_length" : 443,  "blocks_per_base" : 9.634312 }
+ATGAATAGTGGGCGGGGGTTAAATGTCGGCTCTGTGGGTGGTGTGGGCAGATGGTGGTGGGAGCTGGGTGGGTTGGTGGGTGGGTGGCATGGTGGCTGTGGGTGTGGTGGAATGGGTGATCAATTAATAAGATGGGGTGGTGTGATATGGAGGGGTGGTGGGGTGGCGGTGACAATGTGGGGAGTGGTGGTGGGAACAGTAGATCAGGAGGGTGAGCACTGCGATGGGTGTTGGCCGTGGGTGATGGTGGGTGGGTGGCGGGGCATGGCGGGTGTTGGGTGGCGTGGGTTGGGCTGGGGCGTGGTGGGCGCGGGTGGGGTGGGCGCGGGTGGGGTGAAGCAGGAAGGGTGGGAGCGGGTGGGTGGAGTGGGGAAGGTGAAAAAGGGTTTGTGGGTGGCGGGGAGTGAGGGTGGAGGTGCGCGGGAAGAAAAGACGGGTGGGTC