Mercurial > repos > jdv > albacore
diff albacore_1D.py @ 0:f8e25d69167d draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/albacore commit bf5788ad5a3293446a50a3246b44ba09174c9b71
author | jdv |
---|---|
date | Wed, 30 Aug 2017 02:47:27 -0400 |
parents | |
children | 0a4f83207e53 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/albacore_1D.py Wed Aug 30 02:47:27 2017 -0400 @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 + +import sys, os +import glob +import tarfile +import subprocess +import shutil +import h5py +import numpy as np + +def main(): + tar_file = sys.argv[1] + out_file = sys.argv[2] + threads = sys.argv[3] + + (flowcell, kit) = parse_meta(tar_file) + + subprocess.call(["read_fast5_basecaller.py", + "--input", "in_dir", + "--worker_threads", threads, + "--save_path", "out_dir", + "--flowcell", flowcell, + "--kit", kit, + "--recursive", + "--files_per_batch_folder", "0", + "--output_format", "fastq", + "--reads_per_fastq_batch", "999999999" ]) + + #check for single albacore output and copy to Galaxy output + files = glob.glob("out_dir/workspace/*.fastq") + if len(files) != 1: + raise ValueError('No or multiple FASTQ output files found') + found_file = files[0] + shutil.copy(found_file, out_file) + +def parse_meta(fn): + + try: + in_dir = "in_dir" + if not os.path.exists(in_dir): + os.makedirs(in_dir) + + tar = tarfile.open(fn, mode='r') + tar.extractall(path=in_dir) + + files = glob.glob( + os.path.join(in_dir, "**", "*.fast5"), + recursive=True + ) + if len(files) < 1: + raise ValueError('No FAST5 files found') + test_file = files[0] + + f = h5py.File(test_file,"r") + flowcell = f["/UniqueGlobalKey/context_tags"].attrs["flowcell"].upper() + kit = f["/UniqueGlobalKey/context_tags"].attrs["sequencing_kit"].upper() + except OSError as e: + print("Unexpected error:", e.strerror) + raise + + except: + print("Unexpected error:", sys.exc_info()[0]) + raise + + return flowcell, kit + +if __name__ == "__main__" : + main()