Mercurial > repos > jdv > albacore
view albacore_1D.py @ 8:3dfde840119e draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/albacore commit 8bb4eaadc31e6b986bee59392559a825c782447c-dirty
author | jdv |
---|---|
date | Mon, 18 Jun 2018 12:20:28 -0400 |
parents | ce1fa05ffb6c |
children |
line wrap: on
line source
#!/usr/bin/env python3 import sys, os import glob import tarfile import subprocess import shutil import h5py import numpy as np from distutils.util import strtobool from tempfile import mkdtemp def main(): tar_file = sys.argv[1] out_file = sys.argv[2] out_fmt = sys.argv[3] demux = strtobool( sys.argv[4] ) disable_filt = strtobool( sys.argv[5] ) threads = sys.argv[6] tempdir = mkdtemp() (flowcell, kit) = parse_meta(tar_file, tempdir) subprocess.call( ["read_fast5_basecaller.py", "--input", tempdir, "--worker_threads", threads, "--save_path", "out_dir", "--flowcell", flowcell, "--kit", kit, "--recursive", "--files_per_batch_folder", "0", "--output_format", out_fmt, "--reads_per_fastq_batch", "999999999" ] + ["--barcoding"] * demux + ["--disable_filtering"] * disable_filt ) out_path = "out_dir/workspace" pass_path = os.path.join( out_path, "pass" ) if os.path.exists( pass_path ): out_path = pass_path if demux: #check for demuxed albacore output and copy to Galaxy output final_dir = "final" if not os.path.exists(final_dir): os.makedirs(final_dir) dirs = glob.glob( os.path.join(out_path, "*") ) for d in dirs: if out_fmt == 'fastq': bc = os.path.basename( os.path.normpath( d ) ) + ".fastq" print(d) print(bc) out = os.path.join( final_dir, bc ) files = glob.glob( os.path.join( d, "*.fastq") ) if len(files) != 1: raise ValueError('No or multiple FASTQ output files found') found_file = files[0] shutil.copy(found_file, out) elif out_fmt == 'fast5': if (os.path.isfile(d)): if (d.endswith('.fast5')): bc = os.path.basename( os.path.normpath(d) ) + ".tar.gz" files = [d] else: continue else: bc = os.path.basename( os.path.normpath( d ) ) + ".fast5.tar.gz" files = glob.glob( os.path.join( d, "**", "*.fast5"), recursive=True) out = os.path.join( final_dir, bc ) if len(files) < 1: raise ValueError('No FAST5 output files found') tar = tarfile.open(out, 'w:gz') tar.add( d ) tar.close() else: raise ValueError('Bad output format specified') else: if out_fmt == 'fastq': #check for single albacore output and copy to Galaxy output files = glob.glob( os.path.join(out_path, "*.fastq") ) if len(files) != 1: raise ValueError('No or multiple FASTQ output files found') found_file = files[0] shutil.copy(found_file, out_file) elif out_fmt == 'fast5': #check for single albacore output and copy to Galaxy output files = glob.glob( os.path.join(out_path,"**","*.fast5"), recursive=True ) if len(files) < 1: raise ValueError('No FAST5 output files found') tar = tarfile.open(out_file, 'w:gz') tar.add(out_path) tar.close() else: raise ValueError('Bad output format specified') try: shutil.rmtree(tempdir) except: print("Unable to remove temp directory:", sys.exc_info()[0]) raise def parse_meta(fn, in_dir): try: # python's tarfile interface does not sanitize file paths within # tarballs, which can be a big security risk. GNU tar does sanitize by # default, so it's easier/safer here just to call the system tar subprocess.call([ "tar", "--warning=no-unknown-keyword", "-xf", fn, "-C", in_dir]) files = glob.glob( os.path.join(in_dir, "**", "*.fast5"), recursive=True ) if len(files) < 1: raise ValueError('No FAST5 files found') test_file = files[0] f = h5py.File(test_file,"r") #TODO: clean up attribute checking try: flowcell = f["/UniqueGlobalKey/context_tags"].attrs["flowcell"].upper() except: try: flowcell = f["/UniqueGlobalKey/context_tags"].attrs["flowcell_type"].upper() except: raise ValueError('No attribute found for flowcell type') try: kit = f["/UniqueGlobalKey/context_tags"].attrs["sequencing_kit"].upper() except: raise ValueError('No attribute found for sequencing kit') except OSError as e: print("Unexpected error:", e.strerror) raise except: print("Unexpected error:", sys.exc_info()[0]) raise return flowcell, kit if __name__ == "__main__" : main()