Mercurial > repos > devteam > samtools_slice_bam
comparison samtools_slice_bam.py @ 0:8f9af81f2cfe draft
Uploaded tool tarball.
| author | devteam |
|---|---|
| date | Tue, 20 Aug 2013 12:37:32 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8f9af81f2cfe |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 #Dan Blankenberg | |
| 3 | |
| 4 """ | |
| 5 A wrapper script for slicing a BAM file by provided BED file using SAMTools. | |
| 6 %prog input_filename.sam output_filename.bam | |
| 7 """ | |
| 8 #TODO: Confirm that the sort is necessary e.g. if input regions are out of order | |
| 9 | |
| 10 | |
| 11 import sys, optparse, os, tempfile, subprocess, shutil | |
| 12 | |
| 13 CHUNK_SIZE = 2**20 #1mb | |
| 14 | |
| 15 def cleanup_before_exit( tmp_dir ): | |
| 16 if tmp_dir and os.path.exists( tmp_dir ): | |
| 17 shutil.rmtree( tmp_dir ) | |
| 18 | |
| 19 def __main__(): | |
| 20 #Parse Command Line | |
| 21 parser = optparse.OptionParser() | |
| 22 (options, args) = parser.parse_args() | |
| 23 | |
| 24 assert len( args ) == 4, "Invalid command line: samtools_slice_bam.py input.bam input.bam.bai input.interval output.bam" | |
| 25 input_bam_filename, input_index_filename, input_interval_filename, output_bam_filename = args | |
| 26 | |
| 27 tmp_dir = tempfile.mkdtemp( prefix='tmp-samtools_slice_bam-' ) | |
| 28 | |
| 29 tmp_input_bam_filename = os.path.join( tmp_dir, 'input_bam.bam' ) | |
| 30 os.symlink( input_bam_filename, tmp_input_bam_filename ) | |
| 31 os.symlink( input_index_filename, "%s.bai" % tmp_input_bam_filename ) | |
| 32 | |
| 33 #Slice BAM | |
| 34 unsorted_bam_filename = os.path.join( tmp_dir, 'unsorted.bam' ) | |
| 35 unsorted_stderr_filename = os.path.join( tmp_dir, 'unsorted.stderr' ) | |
| 36 cmd = 'samtools view -b -L "%s" "%s" > "%s"' % ( input_interval_filename, tmp_input_bam_filename, unsorted_bam_filename ) | |
| 37 proc = subprocess.Popen( args=cmd, stderr=open( unsorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) | |
| 38 return_code = proc.wait() | |
| 39 if return_code: | |
| 40 stderr_target = sys.stderr | |
| 41 else: | |
| 42 stderr_target = sys.stdout | |
| 43 stderr = open( unsorted_stderr_filename ) | |
| 44 while True: | |
| 45 chunk = stderr.read( CHUNK_SIZE ) | |
| 46 if chunk: | |
| 47 stderr_target.write( chunk ) | |
| 48 else: | |
| 49 break | |
| 50 stderr.close() | |
| 51 | |
| 52 #sort sam, so indexing will not fail | |
| 53 #TODO: confirm if sorting is necessary (is original BAM order maintained, or does the output follow the order of input intervals?) | |
| 54 sorted_stderr_filename = os.path.join( tmp_dir, 'sorted.stderr' ) | |
| 55 sorting_prefix = os.path.join( tmp_dir, 'sorted_bam' ) | |
| 56 cmd = 'samtools sort -o "%s" "%s" > "%s"' % ( unsorted_bam_filename, sorting_prefix, output_bam_filename ) | |
| 57 proc = subprocess.Popen( args=cmd, stderr=open( sorted_stderr_filename, 'wb' ), shell=True, cwd=tmp_dir ) | |
| 58 return_code = proc.wait() | |
| 59 | |
| 60 if return_code: | |
| 61 stderr_target = sys.stderr | |
| 62 else: | |
| 63 stderr_target = sys.stdout | |
| 64 stderr = open( sorted_stderr_filename ) | |
| 65 while True: | |
| 66 chunk = stderr.read( CHUNK_SIZE ) | |
| 67 if chunk: | |
| 68 stderr_target.write( chunk ) | |
| 69 else: | |
| 70 break | |
| 71 stderr.close() | |
| 72 | |
| 73 cleanup_before_exit( tmp_dir ) | |
| 74 | |
| 75 if __name__=="__main__": __main__() |
