annotate bam_to_bigwig/bam_to_wiggle.py @ 0:bc71417f212c

Uploaded
author g2cmnty@test-web1.g2.bx.psu.edu
date Fri, 17 Jun 2011 15:25:25 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
1 #!/usr/bin/env python
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
2 """Convert BAM files to BigWig file format in a specified region.
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
3
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
4 Usage:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
5 bam_to_wiggle.py <BAM file> [<YAML config>]
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
6 [--outfile=<output file name>
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
7 --chrom=<chrom>
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
8 --start=<start>
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
9 --end=<end>]
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
10
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
11 chrom start and end are optional, in which case they default to everything.
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
12
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
13 The config file is in YAML format and specifies the location of the wigToBigWig
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
14 program from UCSC:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
15
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
16 program:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
17 ucsc_bigwig: wigToBigWig
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
18
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
19 If not specified, these will be assumed to be present in the system path.
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
20
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
21 The script requires:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
22 pysam (http://code.google.com/p/pysam/)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
23 wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
24 If a configuration file is used, then PyYAML is also required (http://pyyaml.org/)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
25 """
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
26 import os
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
27 import sys
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
28 import subprocess
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
29 import tempfile
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
30 from optparse import OptionParser
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
31 from contextlib import contextmanager, closing
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
32
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
33 import pysam
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
34
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
35 def main(bam_file, config_file=None, chrom='all', start=0, end=None,
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
36 outfile=None):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
37 if config_file:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
38 import yaml
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
39 with open(config_file) as in_handle:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
40 config = yaml.load(in_handle)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
41 else:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
42 config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
43 if outfile is None:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
44 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
45 if start > 0:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
46 start = int(start) - 1
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
47 if end is not None:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
48 end = int(end)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
49 regions = [(chrom, start, end)]
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
50 if os.path.abspath(bam_file) == os.path.abspath(outfile):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
51 sys.stderr.write("Bad arguments, input and output files are the same.\n")
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
52 sys.exit(1)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
53 if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
54 #Use a temp file to avoid any possiblity of not having write permission
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
55 out_handle = tempfile.NamedTemporaryFile(delete=False)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
56 wig_file = out_handle.name
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
57 with closing(out_handle):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
58 chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
59 try:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
60 if wig_valid:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
61 convert_to_bigwig(wig_file, chr_sizes, config, outfile)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
62 finally:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
63 os.remove(wig_file)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
64
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
65 @contextmanager
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
66 def indexed_bam(bam_file, config):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
67 if not os.path.exists(bam_file + ".bai"):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
68 pysam.index(bam_file)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
69 sam_reader = pysam.Samfile(bam_file, "rb")
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
70 yield sam_reader
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
71 sam_reader.close()
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
72
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
73 def write_bam_track(bam_file, regions, config, out_handle):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
74 out_handle.write("track %s\n" % " ".join(["type=wiggle_0",
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
75 "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0],
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
76 "visibility=full",
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
77 ]))
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
78 is_valid = False
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
79 with indexed_bam(bam_file, config) as work_bam:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
80 sizes = zip(work_bam.references, work_bam.lengths)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
81 if len(regions) == 1 and regions[0][0] == "all":
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
82 regions = [(name, 0, length) for name, length in sizes]
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
83 for chrom, start, end in regions:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
84 if end is None and chrom in work_bam.references:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
85 end = work_bam.lengths[work_bam.references.index(chrom)]
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
86 assert end is not None, "Could not find %s in header" % chrom
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
87 out_handle.write("variableStep chrom=%s\n" % chrom)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
88 for col in work_bam.pileup(chrom, start, end):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
89 out_handle.write("%s %s\n" % (col.pos+1, col.n))
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
90 is_valid = True
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
91 return sizes, is_valid
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
92
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
93 def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None):
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
94 if not bw_file:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
95 bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0])
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
96 size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0])
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
97 with open(size_file, "w") as out_handle:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
98 for chrom, size in chr_sizes:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
99 out_handle.write("%s\t%s\n" % (chrom, size))
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
100 try:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
101 cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file]
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
102 subprocess.check_call(cl)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
103 finally:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
104 os.remove(size_file)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
105 return bw_file
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
106
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
107 if __name__ == "__main__":
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
108 parser = OptionParser()
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
109 parser.add_option("-o", "--outfile", dest="outfile")
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
110 parser.add_option("-c", "--chrom", dest="chrom")
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
111 parser.add_option("-s", "--start", dest="start")
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
112 parser.add_option("-e", "--end", dest="end")
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
113 (options, args) = parser.parse_args()
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
114 if len(args) not in [1, 2]:
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
115 print "Incorrect arguments"
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
116 print __doc__
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
117 sys.exit()
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
118 kwargs = dict(
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
119 outfile=options.outfile,
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
120 chrom=options.chrom or 'all',
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
121 start=options.start or 0,
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
122 end=options.end)
bc71417f212c Uploaded
g2cmnty@test-web1.g2.bx.psu.edu
parents:
diff changeset
123 main(*args, **kwargs)