Mercurial > repos > jdv > albacore
diff albacore_1D.py @ 1:0a4f83207e53 draft
planemo upload for repository https://github.com/jvolkening/galaxy-tools/tree/master/tools/albacore commit 4aa7a76a7b29c425dd89a020979e835d785d3c95-dirty
author | jdv |
---|---|
date | Wed, 06 Sep 2017 12:12:52 -0400 |
parents | f8e25d69167d |
children | d561e3f9ccbb |
line wrap: on
line diff
--- a/albacore_1D.py Wed Aug 30 02:47:27 2017 -0400 +++ b/albacore_1D.py Wed Sep 06 12:12:52 2017 -0400 @@ -7,15 +7,20 @@ import shutil import h5py import numpy as np +import tarfile +from distutils.util import strtobool def main(): tar_file = sys.argv[1] out_file = sys.argv[2] - threads = sys.argv[3] + out_fmt = sys.argv[3] + demux = strtobool( sys.argv[4] ) + threads = sys.argv[5] (flowcell, kit) = parse_meta(tar_file) - subprocess.call(["read_fast5_basecaller.py", + subprocess.call( + ["read_fast5_basecaller.py", "--input", "in_dir", "--worker_threads", threads, "--save_path", "out_dir", @@ -23,15 +28,63 @@ "--kit", kit, "--recursive", "--files_per_batch_folder", "0", - "--output_format", "fastq", - "--reads_per_fastq_batch", "999999999" ]) + "--output_format", out_fmt, + "--reads_per_fastq_batch", "999999999" ] + + ["--barcoding"] * demux ) + + if demux: + #check for demuxed albacore output and copy to Galaxy output + final_dir = "final" + if not os.path.exists(final_dir): + os.makedirs(final_dir) + dirs = glob.glob("out_dir/workspace/*") + for d in dirs: + + if out_fmt == 'fastq': + bc = os.path.basename( os.path.normpath( d ) ) + ".fastq" + print(d) + print(bc) + out = os.path.join( final_dir, bc ) + files = glob.glob( os.path.join( d, "*.fastq") ) + if len(files) != 1: + raise ValueError('No or multiple FASTQ output files found') + found_file = files[0] + shutil.copy(found_file, out) - #check for single albacore output and copy to Galaxy output - files = glob.glob("out_dir/workspace/*.fastq") - if len(files) != 1: - raise ValueError('No or multiple FASTQ output files found') - found_file = files[0] - shutil.copy(found_file, out_file) + elif out_fmt == 'fast5': + bc = os.path.basename( os.path.normpath( d ) ) + ".fast5.tar.gz" + print(d) + print(bc) + out = os.path.join( final_dir, bc ) + files = glob.glob( os.path.join( d, "**", "*.fast5"), recursive=True) + if len(files) < 1: + raise ValueError('No FAST5 output files found') + tar = tarfile.open(out, 'w:gz') + tar.add( d ) + tar.close() + + else: + raise ValueError('Bad output format specified') + + else: + if out_fmt == 'fastq': + #check for single albacore output and copy to Galaxy output + files = glob.glob("out_dir/workspace/*.fastq") + if len(files) != 1: + raise ValueError('No or multiple FASTQ output files found') + found_file = files[0] + shutil.copy(found_file, out_file) + elif out_fmt == 'fast5': + #check for single albacore output and copy to Galaxy output + files = glob.glob("out_dir/workspace/**/*.fast5", recursive=True) + if len(files) < 1: + raise ValueError('No FAST5 output files found') + tar = tarfile.open(out_file, 'w:gz') + tar.add("out_dir/workspace") + tar.close() + else: + raise ValueError('Bad output format specified') + def parse_meta(fn):