diff macs21_wrapper.py @ 2:15889783e759 draft

Fix bugs in tool operation and update dependencies.
author pjbriggs
date Thu, 12 Feb 2015 08:29:07 -0500
parents d0986d2be693
children 0c6b14f3fefc
line wrap: on
line diff
--- a/macs21_wrapper.py	Thu Jan 29 11:11:21 2015 -0500
+++ b/macs21_wrapper.py	Thu Feb 12 08:29:07 2015 -0500
@@ -53,6 +53,65 @@
             fp.write( '\t'.join( fields ) )
     fp.close()
 
+def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file,
+                              chrom_sizes,working_dir=None):
+    """Make bigWig file from a bedGraph
+
+    The protocol is:
+
+    $ fetchChromSizes.sh mm9 > mm9.chrom.sizes
+    $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped
+    $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw
+
+    Get the binaries from
+    http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/
+
+    We skip the fetchChromSizes step if the 'chrom_sizes'
+    argument supplied a valid file with the chromosome sizes
+    for the genome build in question.
+
+    """
+    print "Generating bigWig from bedGraph..."
+    # Check for chromosome sizes
+    if not os.path.exists(chrom_sizes):
+        # Determine genome build
+        chrom_sizes = os.path.basename(chrom_sizes)
+        genome_build = chrom_sizes.split('.')[0]
+        print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build
+        # Run fetchChromSizes
+        chrom_sizes = os.path.join(working_dir,chrom_sizes)
+        stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr")
+        cmd = "fetchChromSizes %s" % genome_build
+        print "Running %s" % cmd
+        proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir,
+                                stdout=open(chrom_sizes,'wb'),
+                                stderr=open(stderr_file,'wb'))
+        proc.wait()
+        # Copy stderr from fetchChromSizes for information only
+        for line in open(stderr_file,'r'):
+            print line.strip()
+        # Check that the sizes file was downloaded
+        if not os.path.exists(chrom_sizes):
+            sys.stderr.write("Failed to download chrom sizes for '%s'" % genome_build)
+            sys.exit(1)
+    # Run bedClip
+    treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file)
+    cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped)
+    print "Running %s" % cmd
+    proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
+    proc.wait()
+    # Check that clipped file exists
+    treat_clipped = os.path.join(working_dir,treat_clipped)
+    if not os.path.exists(treat_clipped):
+        sys.stderr.write("Failed to create clipped bed file")
+        sys.exit(1)
+    # Run bedGraphToBigWig
+    cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes,
+                                         bigwig_file)
+    print "Running %s" % cmd
+    proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
+    proc.wait()
+
 if __name__ == "__main__":
 
     # Echo the command line
@@ -67,10 +126,14 @@
     output_narrowpeaks = None
     output_treat_pileup = None
     output_lambda_bedgraph = None
+    output_bigwig = None
     output_xls_to_interval_peaks_file = None
     output_peaks = None
     output_bdgcmp = None
 
+    # Other initialisations
+    chrom_sizes_file = None
+
     # Build the MACS 2.1 command line
     # Initial arguments are always the same: command & input ChIP-seq file name
     cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])]
@@ -85,6 +148,9 @@
             # Replace whitespace in name with underscores
             experiment_name = '_'.join(arg.split('=')[1].split())
             cmdline.append("--name=%s" % experiment_name)
+        elif arg.startswith('--length='):
+            # Extract chromosome size file
+            chrom_sizes_file = arg.split('=')[1]
         elif arg.startswith('--output-'):
             # Handle destinations for output files
             arg0,filen = arg.split('=')
@@ -104,6 +170,8 @@
                 output_treat_pileup = filen
             elif  arg0 == '--output-lambda-bedgraph':
                 output_lambda_bedgraph = filen
+            elif  arg0 == '--output-bigwig':
+                output_bigwig = filen
             elif  arg0 == '--output-xls-to-interval':
                 output_xls_to_interval_peaks_file = filen
             elif  arg0 == '--output-peaks':
@@ -142,6 +210,13 @@
         if os.path.exists(peaks_xls_file):
             convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file,
                                     header='peaks file')
+
+    # Create bigWig from bedGraph, if requested
+    if output_bigwig is not None:
+        treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name)
+        if os.path.exists(treat_bedgraph_file):
+            make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig,
+                                      chrom_sizes_file,working_dir)
         
     # Move MACS2 output files from working dir to their final destinations
     move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits)