diff albacore_1D.py @ 1:0a4f83207e53 draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/albacore commit 4aa7a76a7b29c425dd89a020979e835d785d3c95-dirty
author jdv
date Wed, 06 Sep 2017 12:12:52 -0400
parents f8e25d69167d
children d561e3f9ccbb
line wrap: on
line diff
--- a/albacore_1D.py	Wed Aug 30 02:47:27 2017 -0400
+++ b/albacore_1D.py	Wed Sep 06 12:12:52 2017 -0400
@@ -7,15 +7,20 @@
 import shutil
 import h5py
 import numpy as np
+import tarfile
+from distutils.util import strtobool
 
 def main():
     tar_file = sys.argv[1]
     out_file = sys.argv[2]
-    threads  = sys.argv[3]
+    out_fmt  = sys.argv[3]
+    demux    = strtobool( sys.argv[4] )
+    threads  = sys.argv[5]
 
     (flowcell, kit) = parse_meta(tar_file)
 
-    subprocess.call(["read_fast5_basecaller.py",
+    subprocess.call(
+        ["read_fast5_basecaller.py",
         "--input", "in_dir",
         "--worker_threads", threads,
         "--save_path", "out_dir",
@@ -23,15 +28,63 @@
         "--kit", kit,
         "--recursive",
         "--files_per_batch_folder", "0",
-        "--output_format", "fastq",
-        "--reads_per_fastq_batch", "999999999" ])
+        "--output_format", out_fmt,
+        "--reads_per_fastq_batch", "999999999" ] +
+        ["--barcoding"] * demux )
+
+    if demux:
+        #check for demuxed albacore output and copy to Galaxy output
+        final_dir = "final"
+        if not os.path.exists(final_dir):
+            os.makedirs(final_dir)
+        dirs = glob.glob("out_dir/workspace/*")
+        for d in dirs:
+
+            if out_fmt == 'fastq':
+                bc = os.path.basename( os.path.normpath( d ) ) + ".fastq"
+                print(d)
+                print(bc)
+                out = os.path.join( final_dir, bc )
+                files = glob.glob( os.path.join( d, "*.fastq") )
+                if len(files) != 1:
+                    raise ValueError('No or multiple FASTQ output files found')
+                found_file = files[0]
+                shutil.copy(found_file, out)
 
-    #check for single albacore output and copy to Galaxy output
-    files = glob.glob("out_dir/workspace/*.fastq")
-    if len(files) != 1:
-        raise ValueError('No or multiple FASTQ output files found')
-    found_file = files[0]
-    shutil.copy(found_file, out_file)
+            elif out_fmt == 'fast5':
+                bc = os.path.basename( os.path.normpath( d ) ) + ".fast5.tar.gz"
+                print(d)
+                print(bc)
+                out = os.path.join( final_dir, bc )
+                files = glob.glob( os.path.join( d, "**", "*.fast5"), recursive=True)
+                if len(files) < 1:
+                    raise ValueError('No FAST5 output files found')
+                tar = tarfile.open(out, 'w:gz')
+                tar.add( d )
+                tar.close()
+
+            else:
+                raise ValueError('Bad output format specified')
+
+    else:
+        if out_fmt == 'fastq':
+            #check for single albacore output and copy to Galaxy output
+            files = glob.glob("out_dir/workspace/*.fastq")
+            if len(files) != 1:
+                raise ValueError('No or multiple FASTQ output files found')
+            found_file = files[0]
+            shutil.copy(found_file, out_file)
+        elif out_fmt == 'fast5':
+            #check for single albacore output and copy to Galaxy output
+            files = glob.glob("out_dir/workspace/**/*.fast5", recursive=True)
+            if len(files) < 1:
+                raise ValueError('No FAST5 output files found')
+            tar = tarfile.open(out_file, 'w:gz')
+            tar.add("out_dir/workspace")
+            tar.close()
+        else:
+            raise ValueError('Bad output format specified')
+
 
 def parse_meta(fn):