Mercurial > repos > pjbriggs > macs21
diff macs21_wrapper.py @ 2:15889783e759 draft
Fix bugs in tool operation and update dependencies.
author | pjbriggs |
---|---|
date | Thu, 12 Feb 2015 08:29:07 -0500 |
parents | d0986d2be693 |
children | 0c6b14f3fefc |
line wrap: on
line diff
--- a/macs21_wrapper.py Thu Jan 29 11:11:21 2015 -0500 +++ b/macs21_wrapper.py Thu Feb 12 08:29:07 2015 -0500 @@ -53,6 +53,65 @@ fp.write( '\t'.join( fields ) ) fp.close() +def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file, + chrom_sizes,working_dir=None): + """Make bigWig file from a bedGraph + + The protocol is: + + $ fetchChromSizes.sh mm9 > mm9.chrom.sizes + $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped + $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw + + Get the binaries from + http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/ + + We skip the fetchChromSizes step if the 'chrom_sizes' + argument supplied a valid file with the chromosome sizes + for the genome build in question. + + """ + print "Generating bigWig from bedGraph..." + # Check for chromosome sizes + if not os.path.exists(chrom_sizes): + # Determine genome build + chrom_sizes = os.path.basename(chrom_sizes) + genome_build = chrom_sizes.split('.')[0] + print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build + # Run fetchChromSizes + chrom_sizes = os.path.join(working_dir,chrom_sizes) + stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr") + cmd = "fetchChromSizes %s" % genome_build + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, + stdout=open(chrom_sizes,'wb'), + stderr=open(stderr_file,'wb')) + proc.wait() + # Copy stderr from fetchChromSizes for information only + for line in open(stderr_file,'r'): + print line.strip() + # Check that the sizes file was downloaded + if not os.path.exists(chrom_sizes): + sys.stderr.write("Failed to download chrom sizes for '%s'" % genome_build) + sys.exit(1) + # Run bedClip + treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file) + cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped) + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) + proc.wait() + # Check that clipped file exists + treat_clipped = os.path.join(working_dir,treat_clipped) + if not os.path.exists(treat_clipped): + sys.stderr.write("Failed to create clipped bed file") + sys.exit(1) + # Run bedGraphToBigWig + cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes, + bigwig_file) + print "Running %s" % cmd + proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) + proc.wait() + if __name__ == "__main__": # Echo the command line @@ -67,10 +126,14 @@ output_narrowpeaks = None output_treat_pileup = None output_lambda_bedgraph = None + output_bigwig = None output_xls_to_interval_peaks_file = None output_peaks = None output_bdgcmp = None + # Other initialisations + chrom_sizes_file = None + # Build the MACS 2.1 command line # Initial arguments are always the same: command & input ChIP-seq file name cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] @@ -85,6 +148,9 @@ # Replace whitespace in name with underscores experiment_name = '_'.join(arg.split('=')[1].split()) cmdline.append("--name=%s" % experiment_name) + elif arg.startswith('--length='): + # Extract chromosome size file + chrom_sizes_file = arg.split('=')[1] elif arg.startswith('--output-'): # Handle destinations for output files arg0,filen = arg.split('=') @@ -104,6 +170,8 @@ output_treat_pileup = filen elif arg0 == '--output-lambda-bedgraph': output_lambda_bedgraph = filen + elif arg0 == '--output-bigwig': + output_bigwig = filen elif arg0 == '--output-xls-to-interval': output_xls_to_interval_peaks_file = filen elif arg0 == '--output-peaks': @@ -142,6 +210,13 @@ if os.path.exists(peaks_xls_file): convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, header='peaks file') + + # Create bigWig from bedGraph, if requested + if output_bigwig is not None: + treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name) + if os.path.exists(treat_bedgraph_file): + make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig, + chrom_sizes_file,working_dir) # Move MACS2 output files from working dir to their final destinations move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits)