diff bam_to_bigwig/bam_to_wiggle.py @ 0:bc71417f212c

Uploaded
author g2cmnty@test-web1.g2.bx.psu.edu
date Fri, 17 Jun 2011 15:25:25 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_to_bigwig/bam_to_wiggle.py	Fri Jun 17 15:25:25 2011 -0400
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""Convert BAM files to BigWig file format in a specified region.
+
+Usage:
+    bam_to_wiggle.py <BAM file> [<YAML config>]
+    [--outfile=<output file name>
+     --chrom=<chrom>
+     --start=<start>
+     --end=<end>]
+
+chrom start and end are optional, in which case they default to everything.
+
+The config file is in YAML format and specifies the location of the wigToBigWig
+program from UCSC:
+
+program:
+  ucsc_bigwig: wigToBigWig
+
+If not specified, these will be assumed to be present in the system path.
+
+The script requires:
+    pysam (http://code.google.com/p/pysam/)
+    wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
+If a configuration file is used, then PyYAML is also required (http://pyyaml.org/)
+"""
+import os
+import sys
+import subprocess
+import tempfile
+from optparse import OptionParser
+from contextlib import contextmanager, closing
+
+import pysam
+
+def main(bam_file, config_file=None, chrom='all', start=0, end=None,
+         outfile=None):
+    if config_file:
+        import yaml
+        with open(config_file) as in_handle:
+            config = yaml.load(in_handle)
+    else:
+        config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
+    if outfile is None:
+        outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
+    if start > 0:
+        start = int(start) - 1
+    if end is not None:
+        end = int(end)
+    regions = [(chrom, start, end)]
+    if os.path.abspath(bam_file) == os.path.abspath(outfile):
+        sys.stderr.write("Bad arguments, input and output files are the same.\n")
+        sys.exit(1)
+    if not (os.path.exists(outfile) and os.path.getsize(outfile) > 0):
+        #Use a temp file to avoid any possiblity of not having write permission
+        out_handle = tempfile.NamedTemporaryFile(delete=False)
+        wig_file = out_handle.name
+        with closing(out_handle):
+            chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle)
+        try:
+            if wig_valid:
+                convert_to_bigwig(wig_file, chr_sizes, config, outfile)
+        finally:
+            os.remove(wig_file)
+
+@contextmanager
+def indexed_bam(bam_file, config):
+    if not os.path.exists(bam_file + ".bai"):
+        pysam.index(bam_file)
+    sam_reader = pysam.Samfile(bam_file, "rb")
+    yield sam_reader
+    sam_reader.close()
+
+def write_bam_track(bam_file, regions, config, out_handle):
+    out_handle.write("track %s\n" % " ".join(["type=wiggle_0",
+        "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0],
+        "visibility=full",
+        ]))
+    is_valid = False
+    with indexed_bam(bam_file, config) as work_bam:
+        sizes = zip(work_bam.references, work_bam.lengths)
+        if len(regions) == 1 and regions[0][0] == "all":
+            regions = [(name, 0, length) for name, length in sizes]
+        for chrom, start, end in regions:
+            if end is None and chrom in work_bam.references:
+                end = work_bam.lengths[work_bam.references.index(chrom)]
+            assert end is not None, "Could not find %s in header" % chrom
+            out_handle.write("variableStep chrom=%s\n" % chrom)
+            for col in work_bam.pileup(chrom, start, end):
+                out_handle.write("%s %s\n" % (col.pos+1, col.n))
+                is_valid = True
+    return sizes, is_valid
+
+def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None):
+    if not bw_file:
+        bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0])
+    size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0])
+    with open(size_file, "w") as out_handle:
+        for chrom, size in chr_sizes:
+            out_handle.write("%s\t%s\n" % (chrom, size))
+    try:
+        cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file]
+        subprocess.check_call(cl)
+    finally:
+        os.remove(size_file)
+    return bw_file
+
+if __name__ == "__main__":
+    parser = OptionParser()
+    parser.add_option("-o", "--outfile", dest="outfile")
+    parser.add_option("-c", "--chrom", dest="chrom")
+    parser.add_option("-s", "--start", dest="start")
+    parser.add_option("-e", "--end", dest="end")
+    (options, args) = parser.parse_args()
+    if len(args) not in [1, 2]:
+        print "Incorrect arguments"
+        print __doc__
+        sys.exit()
+    kwargs = dict(
+        outfile=options.outfile,
+        chrom=options.chrom or 'all',
+        start=options.start or 0,
+        end=options.end)
+    main(*args, **kwargs)