comparison macs21_wrapper.py @ 2:15889783e759 draft

Fix bugs in tool operation and update dependencies.
author pjbriggs
date Thu, 12 Feb 2015 08:29:07 -0500
parents d0986d2be693
children 0c6b14f3fefc
comparison
equal deleted inserted replaced
1:d0986d2be693 2:15889783e759
51 # "bad" line instead 51 # "bad" line instead
52 fields[0] = "#%s" % fields[0] 52 fields[0] = "#%s" % fields[0]
53 fp.write( '\t'.join( fields ) ) 53 fp.write( '\t'.join( fields ) )
54 fp.close() 54 fp.close()
55 55
56 def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file,
57 chrom_sizes,working_dir=None):
58 """Make bigWig file from a bedGraph
59
60 The protocol is:
61
62 $ fetchChromSizes.sh mm9 > mm9.chrom.sizes
63 $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped
64 $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw
65
66 Get the binaries from
67 http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/
68
69 We skip the fetchChromSizes step if the 'chrom_sizes'
70 argument supplied a valid file with the chromosome sizes
71 for the genome build in question.
72
73 """
74 print "Generating bigWig from bedGraph..."
75 # Check for chromosome sizes
76 if not os.path.exists(chrom_sizes):
77 # Determine genome build
78 chrom_sizes = os.path.basename(chrom_sizes)
79 genome_build = chrom_sizes.split('.')[0]
80 print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build
81 # Run fetchChromSizes
82 chrom_sizes = os.path.join(working_dir,chrom_sizes)
83 stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr")
84 cmd = "fetchChromSizes %s" % genome_build
85 print "Running %s" % cmd
86 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir,
87 stdout=open(chrom_sizes,'wb'),
88 stderr=open(stderr_file,'wb'))
89 proc.wait()
90 # Copy stderr from fetchChromSizes for information only
91 for line in open(stderr_file,'r'):
92 print line.strip()
93 # Check that the sizes file was downloaded
94 if not os.path.exists(chrom_sizes):
95 sys.stderr.write("Failed to download chrom sizes for '%s'" % genome_build)
96 sys.exit(1)
97 # Run bedClip
98 treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file)
99 cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped)
100 print "Running %s" % cmd
101 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
102 proc.wait()
103 # Check that clipped file exists
104 treat_clipped = os.path.join(working_dir,treat_clipped)
105 if not os.path.exists(treat_clipped):
106 sys.stderr.write("Failed to create clipped bed file")
107 sys.exit(1)
108 # Run bedGraphToBigWig
109 cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes,
110 bigwig_file)
111 print "Running %s" % cmd
112 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir)
113 proc.wait()
114
56 if __name__ == "__main__": 115 if __name__ == "__main__":
57 116
58 # Echo the command line 117 # Echo the command line
59 print ' '.join(sys.argv) 118 print ' '.join(sys.argv)
60 119
65 output_broadpeaks = None 124 output_broadpeaks = None
66 output_gappedpeaks = None 125 output_gappedpeaks = None
67 output_narrowpeaks = None 126 output_narrowpeaks = None
68 output_treat_pileup = None 127 output_treat_pileup = None
69 output_lambda_bedgraph = None 128 output_lambda_bedgraph = None
129 output_bigwig = None
70 output_xls_to_interval_peaks_file = None 130 output_xls_to_interval_peaks_file = None
71 output_peaks = None 131 output_peaks = None
72 output_bdgcmp = None 132 output_bdgcmp = None
133
134 # Other initialisations
135 chrom_sizes_file = None
73 136
74 # Build the MACS 2.1 command line 137 # Build the MACS 2.1 command line
75 # Initial arguments are always the same: command & input ChIP-seq file name 138 # Initial arguments are always the same: command & input ChIP-seq file name
76 cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] 139 cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])]
77 140
83 cmdline.append("--format=%s" % format_) 146 cmdline.append("--format=%s" % format_)
84 elif arg.startswith('--name='): 147 elif arg.startswith('--name='):
85 # Replace whitespace in name with underscores 148 # Replace whitespace in name with underscores
86 experiment_name = '_'.join(arg.split('=')[1].split()) 149 experiment_name = '_'.join(arg.split('=')[1].split())
87 cmdline.append("--name=%s" % experiment_name) 150 cmdline.append("--name=%s" % experiment_name)
151 elif arg.startswith('--length='):
152 # Extract chromosome size file
153 chrom_sizes_file = arg.split('=')[1]
88 elif arg.startswith('--output-'): 154 elif arg.startswith('--output-'):
89 # Handle destinations for output files 155 # Handle destinations for output files
90 arg0,filen = arg.split('=') 156 arg0,filen = arg.split('=')
91 if arg0 == '--output-summits': 157 if arg0 == '--output-summits':
92 output_summits = filen 158 output_summits = filen
102 output_narrowpeaks = filen 168 output_narrowpeaks = filen
103 elif arg0 == '--output-pileup': 169 elif arg0 == '--output-pileup':
104 output_treat_pileup = filen 170 output_treat_pileup = filen
105 elif arg0 == '--output-lambda-bedgraph': 171 elif arg0 == '--output-lambda-bedgraph':
106 output_lambda_bedgraph = filen 172 output_lambda_bedgraph = filen
173 elif arg0 == '--output-bigwig':
174 output_bigwig = filen
107 elif arg0 == '--output-xls-to-interval': 175 elif arg0 == '--output-xls-to-interval':
108 output_xls_to_interval_peaks_file = filen 176 output_xls_to_interval_peaks_file = filen
109 elif arg0 == '--output-peaks': 177 elif arg0 == '--output-peaks':
110 output_peaks = filen 178 output_peaks = filen
111 else: 179 else:
140 if output_xls_to_interval_peaks_file is not None: 208 if output_xls_to_interval_peaks_file is not None:
141 peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name ) 209 peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name )
142 if os.path.exists(peaks_xls_file): 210 if os.path.exists(peaks_xls_file):
143 convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, 211 convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file,
144 header='peaks file') 212 header='peaks file')
213
214 # Create bigWig from bedGraph, if requested
215 if output_bigwig is not None:
216 treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name)
217 if os.path.exists(treat_bedgraph_file):
218 make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig,
219 chrom_sizes_file,working_dir)
145 220
146 # Move MACS2 output files from working dir to their final destinations 221 # Move MACS2 output files from working dir to their final destinations
147 move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits) 222 move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits)
148 move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks) 223 move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks)
149 move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks) 224 move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks)