0
|
1 #!/usr/bin/env python
|
|
2 """Convert BAM files to BigWig file format in a specified region.
|
|
3
|
|
4 Usage:
|
|
5 bam_to_wiggle.py <BAM file> [<YAML config>]
|
|
6 [--outfile=<output file name>
|
|
7 --chrom=<chrom>
|
|
8 --start=<start>
|
|
9 --end=<end>]
|
|
10
|
|
11 chrom start and end are optional, in which case they default to everything.
|
|
12
|
|
13 The config file is in YAML format and specifies the location of the wigToBigWig
|
|
14 program from UCSC:
|
|
15
|
|
16 program:
|
|
17 ucsc_bigwig: wigToBigWig
|
|
18
|
|
19 If not specified, these will be assumed to be present in the system path.
|
|
20
|
|
21 The script requires:
|
|
22 pysam (http://code.google.com/p/pysam/)
|
|
23 wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
|
|
24 If a configuration file is used, then PyYAML is also required (http://pyyaml.org/)
|
|
25 """
|
|
26 import os
|
|
27 import sys
|
|
28 import subprocess
|
|
29 import tempfile
|
|
30 from optparse import OptionParser
|
|
31 from contextlib import contextmanager, closing
|
|
32
|
|
33 import pysam
|
|
34
|
|
35 def main(bam_file, config_file=None, chrom='all', start=0, end=None,
|
|
36 outfile=None):
|
|
37 if config_file:
|
|
38 import yaml
|
|
39 with open(config_file) as in_handle:
|
|
40 config = yaml.load(in_handle)
|
|
41 else:
|
|
42 config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
|
|
43 if outfile is None:
|
|
44 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
|
|
45 if start > 0:
|
|
46 start = int(start) - 1
|
|
47 if end is not None:
|
|
48 end = int(end)
|
|
49 regions = [(chrom, start, end)]
|
|
50 if os.path.abspath(bam_file) == os.path.abspath(outfile):
|
|
51 sys.stderr.write("Bad arguments, input and output files are the same.\n")
|
|
52 sys.exit(1)
|
|
53 if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0):
|
|
54 #Use a temp file to avoid any possiblity of not having write permission
|
|
55 out_handle = tempfile.NamedTemporaryFile(delete=False)
|
|
56 wig_file = out_handle.name
|
|
57 with closing(out_handle):
|
|
58 chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle)
|
|
59 try:
|
|
60 if wig_valid:
|
|
61 convert_to_bigwig(wig_file, chr_sizes, config, outfile)
|
|
62 finally:
|
|
63 os.remove(wig_file)
|
|
64
|
|
65 @contextmanager
|
|
66 def indexed_bam(bam_file, config):
|
|
67 if not os.path.exists(bam_file + ".bai"):
|
|
68 pysam.index(bam_file)
|
|
69 sam_reader = pysam.Samfile(bam_file, "rb")
|
|
70 yield sam_reader
|
|
71 sam_reader.close()
|
|
72
|
|
73 def write_bam_track(bam_file, regions, config, out_handle):
|
|
74 out_handle.write("track %s\n" % " ".join(["type=wiggle_0",
|
|
75 "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0],
|
|
76 "visibility=full",
|
|
77 ]))
|
|
78 is_valid = False
|
|
79 with indexed_bam(bam_file, config) as work_bam:
|
|
80 sizes = zip(work_bam.references, work_bam.lengths)
|
|
81 if len(regions) == 1 and regions[0][0] == "all":
|
|
82 regions = [(name, 0, length) for name, length in sizes]
|
|
83 for chrom, start, end in regions:
|
|
84 if end is None and chrom in work_bam.references:
|
|
85 end = work_bam.lengths[work_bam.references.index(chrom)]
|
|
86 assert end is not None, "Could not find %s in header" % chrom
|
|
87 out_handle.write("variableStep chrom=%s\n" % chrom)
|
|
88 for col in work_bam.pileup(chrom, start, end):
|
|
89 out_handle.write("%s %s\n" % (col.pos+1, col.n))
|
|
90 is_valid = True
|
|
91 return sizes, is_valid
|
|
92
|
|
93 def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None):
|
|
94 if not bw_file:
|
|
95 bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0])
|
|
96 size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0])
|
|
97 with open(size_file, "w") as out_handle:
|
|
98 for chrom, size in chr_sizes:
|
|
99 out_handle.write("%s\t%s\n" % (chrom, size))
|
|
100 try:
|
|
101 cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file]
|
|
102 subprocess.check_call(cl)
|
|
103 finally:
|
|
104 os.remove(size_file)
|
|
105 return bw_file
|
|
106
|
|
107 if __name__ == "__main__":
|
|
108 parser = OptionParser()
|
|
109 parser.add_option("-o", "--outfile", dest="outfile")
|
|
110 parser.add_option("-c", "--chrom", dest="chrom")
|
|
111 parser.add_option("-s", "--start", dest="start")
|
|
112 parser.add_option("-e", "--end", dest="end")
|
|
113 (options, args) = parser.parse_args()
|
|
114 if len(args) not in [1, 2]:
|
|
115 print "Incorrect arguments"
|
|
116 print __doc__
|
|
117 sys.exit()
|
|
118 kwargs = dict(
|
|
119 outfile=options.outfile,
|
|
120 chrom=options.chrom or 'all',
|
|
121 start=options.start or 0,
|
|
122 end=options.end)
|
|
123 main(*args, **kwargs)
|