comparison albacore_1D.py @ 1:0a4f83207e53 draft

planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/albacore commit 4aa7a76a7b29c425dd89a020979e835d785d3c95-dirty
author jdv
date Wed, 06 Sep 2017 12:12:52 -0400
parents f8e25d69167d
children d561e3f9ccbb
comparison
equal deleted inserted replaced
0:f8e25d69167d 1:0a4f83207e53
5 import tarfile 5 import tarfile
6 import subprocess 6 import subprocess
7 import shutil 7 import shutil
8 import h5py 8 import h5py
9 import numpy as np 9 import numpy as np
10 import tarfile
11 from distutils.util import strtobool
10 12
11 def main(): 13 def main():
12 tar_file = sys.argv[1] 14 tar_file = sys.argv[1]
13 out_file = sys.argv[2] 15 out_file = sys.argv[2]
14 threads = sys.argv[3] 16 out_fmt = sys.argv[3]
17 demux = strtobool( sys.argv[4] )
18 threads = sys.argv[5]
15 19
16 (flowcell, kit) = parse_meta(tar_file) 20 (flowcell, kit) = parse_meta(tar_file)
17 21
18 subprocess.call(["read_fast5_basecaller.py", 22 subprocess.call(
23 ["read_fast5_basecaller.py",
19 "--input", "in_dir", 24 "--input", "in_dir",
20 "--worker_threads", threads, 25 "--worker_threads", threads,
21 "--save_path", "out_dir", 26 "--save_path", "out_dir",
22 "--flowcell", flowcell, 27 "--flowcell", flowcell,
23 "--kit", kit, 28 "--kit", kit,
24 "--recursive", 29 "--recursive",
25 "--files_per_batch_folder", "0", 30 "--files_per_batch_folder", "0",
26 "--output_format", "fastq", 31 "--output_format", out_fmt,
27 "--reads_per_fastq_batch", "999999999" ]) 32 "--reads_per_fastq_batch", "999999999" ] +
33 ["--barcoding"] * demux )
28 34
29 #check for single albacore output and copy to Galaxy output 35 if demux:
30 files = glob.glob("out_dir/workspace/*.fastq") 36 #check for demuxed albacore output and copy to Galaxy output
31 if len(files) != 1: 37 final_dir = "final"
32 raise ValueError('No or multiple FASTQ output files found') 38 if not os.path.exists(final_dir):
33 found_file = files[0] 39 os.makedirs(final_dir)
34 shutil.copy(found_file, out_file) 40 dirs = glob.glob("out_dir/workspace/*")
41 for d in dirs:
42
43 if out_fmt == 'fastq':
44 bc = os.path.basename( os.path.normpath( d ) ) + ".fastq"
45 print(d)
46 print(bc)
47 out = os.path.join( final_dir, bc )
48 files = glob.glob( os.path.join( d, "*.fastq") )
49 if len(files) != 1:
50 raise ValueError('No or multiple FASTQ output files found')
51 found_file = files[0]
52 shutil.copy(found_file, out)
53
54 elif out_fmt == 'fast5':
55 bc = os.path.basename( os.path.normpath( d ) ) + ".fast5.tar.gz"
56 print(d)
57 print(bc)
58 out = os.path.join( final_dir, bc )
59 files = glob.glob( os.path.join( d, "**", "*.fast5"), recursive=True)
60 if len(files) < 1:
61 raise ValueError('No FAST5 output files found')
62 tar = tarfile.open(out, 'w:gz')
63 tar.add( d )
64 tar.close()
65
66 else:
67 raise ValueError('Bad output format specified')
68
69 else:
70 if out_fmt == 'fastq':
71 #check for single albacore output and copy to Galaxy output
72 files = glob.glob("out_dir/workspace/*.fastq")
73 if len(files) != 1:
74 raise ValueError('No or multiple FASTQ output files found')
75 found_file = files[0]
76 shutil.copy(found_file, out_file)
77 elif out_fmt == 'fast5':
78 #check for single albacore output and copy to Galaxy output
79 files = glob.glob("out_dir/workspace/**/*.fast5", recursive=True)
80 if len(files) < 1:
81 raise ValueError('No FAST5 output files found')
82 tar = tarfile.open(out_file, 'w:gz')
83 tar.add("out_dir/workspace")
84 tar.close()
85 else:
86 raise ValueError('Bad output format specified')
87
35 88
36 def parse_meta(fn): 89 def parse_meta(fn):
37 90
38 try: 91 try:
39 in_dir = "in_dir" 92 in_dir = "in_dir"