diff albacore_1D.py @ 0:f8e25d69167d draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/albacore commit bf5788ad5a3293446a50a3246b44ba09174c9b71
author jdv
date Wed, 30 Aug 2017 02:47:27 -0400
parents
children 0a4f83207e53
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/albacore_1D.py	Wed Aug 30 02:47:27 2017 -0400
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+import sys, os
+import glob
+import tarfile
+import subprocess
+import shutil
+import h5py
+import numpy as np
+
+def main():
+    tar_file = sys.argv[1]
+    out_file = sys.argv[2]
+    threads  = sys.argv[3]
+
+    (flowcell, kit) = parse_meta(tar_file)
+
+    subprocess.call(["read_fast5_basecaller.py",
+        "--input", "in_dir",
+        "--worker_threads", threads,
+        "--save_path", "out_dir",
+        "--flowcell", flowcell,
+        "--kit", kit,
+        "--recursive",
+        "--files_per_batch_folder", "0",
+        "--output_format", "fastq",
+        "--reads_per_fastq_batch", "999999999" ])
+
+    #check for single albacore output and copy to Galaxy output
+    files = glob.glob("out_dir/workspace/*.fastq")
+    if len(files) != 1:
+        raise ValueError('No or multiple FASTQ output files found')
+    found_file = files[0]
+    shutil.copy(found_file, out_file)
+
+def parse_meta(fn):
+
+    try:
+        in_dir = "in_dir"
+        if not os.path.exists(in_dir):
+            os.makedirs(in_dir)
+
+        tar = tarfile.open(fn, mode='r')
+        tar.extractall(path=in_dir)
+
+        files = glob.glob(
+            os.path.join(in_dir, "**", "*.fast5"),
+            recursive=True
+        )
+        if len(files) < 1:
+            raise ValueError('No FAST5 files found')
+        test_file = files[0]
+
+        f = h5py.File(test_file,"r")
+        flowcell = f["/UniqueGlobalKey/context_tags"].attrs["flowcell"].upper()
+        kit = f["/UniqueGlobalKey/context_tags"].attrs["sequencing_kit"].upper()
+    except OSError as e:
+        print("Unexpected error:", e.strerror)
+        raise
+
+    except:
+        print("Unexpected error:", sys.exc_info()[0])
+        raise
+
+    return flowcell, kit
+
+if __name__ == "__main__" :
+    main()