Mercurial > repos > pjbriggs > macs21
comparison macs21_wrapper.py @ 2:15889783e759 draft
Fix bugs in tool operation and update dependencies.
author | pjbriggs |
---|---|
date | Thu, 12 Feb 2015 08:29:07 -0500 |
parents | d0986d2be693 |
children | 0c6b14f3fefc |
comparison
equal
deleted
inserted
replaced
1:d0986d2be693 | 2:15889783e759 |
---|---|
51 # "bad" line instead | 51 # "bad" line instead |
52 fields[0] = "#%s" % fields[0] | 52 fields[0] = "#%s" % fields[0] |
53 fp.write( '\t'.join( fields ) ) | 53 fp.write( '\t'.join( fields ) ) |
54 fp.close() | 54 fp.close() |
55 | 55 |
56 def make_bigwig_from_bedgraph(bedgraph_file,bigwig_file, | |
57 chrom_sizes,working_dir=None): | |
58 """Make bigWig file from a bedGraph | |
59 | |
60 The protocol is: | |
61 | |
62 $ fetchChromSizes.sh mm9 > mm9.chrom.sizes | |
63 $ bedClip treat.bedgraph mm9.chrom.sizes treat.clipped | |
64 $ bedGraphToBigWig treat.clipped mm9.chrom.sizes treat.bw | |
65 | |
66 Get the binaries from | |
67 http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/ | |
68 | |
69 We skip the fetchChromSizes step if the 'chrom_sizes' | |
70 argument supplied a valid file with the chromosome sizes | |
71 for the genome build in question. | |
72 | |
73 """ | |
74 print "Generating bigWig from bedGraph..." | |
75 # Check for chromosome sizes | |
76 if not os.path.exists(chrom_sizes): | |
77 # Determine genome build | |
78 chrom_sizes = os.path.basename(chrom_sizes) | |
79 genome_build = chrom_sizes.split('.')[0] | |
80 print "Missing chrom sizes file, attempting to fetch for '%s'" % genome_build | |
81 # Run fetchChromSizes | |
82 chrom_sizes = os.path.join(working_dir,chrom_sizes) | |
83 stderr_file = os.path.join(working_dir,"fetchChromSizes.stderr") | |
84 cmd = "fetchChromSizes %s" % genome_build | |
85 print "Running %s" % cmd | |
86 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir, | |
87 stdout=open(chrom_sizes,'wb'), | |
88 stderr=open(stderr_file,'wb')) | |
89 proc.wait() | |
90 # Copy stderr from fetchChromSizes for information only | |
91 for line in open(stderr_file,'r'): | |
92 print line.strip() | |
93 # Check that the sizes file was downloaded | |
94 if not os.path.exists(chrom_sizes): | |
95 sys.stderr.write("Failed to download chrom sizes for '%s'" % genome_build) | |
96 sys.exit(1) | |
97 # Run bedClip | |
98 treat_clipped = "%s.clipped" % os.path.basename(bedgraph_file) | |
99 cmd = "bedClip %s %s %s" % (bedgraph_file,chrom_sizes,treat_clipped) | |
100 print "Running %s" % cmd | |
101 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) | |
102 proc.wait() | |
103 # Check that clipped file exists | |
104 treat_clipped = os.path.join(working_dir,treat_clipped) | |
105 if not os.path.exists(treat_clipped): | |
106 sys.stderr.write("Failed to create clipped bed file") | |
107 sys.exit(1) | |
108 # Run bedGraphToBigWig | |
109 cmd = "bedGraphToBigWig %s %s %s" % (treat_clipped,chrom_sizes, | |
110 bigwig_file) | |
111 print "Running %s" % cmd | |
112 proc = subprocess.Popen(args=cmd,shell=True,cwd=working_dir) | |
113 proc.wait() | |
114 | |
56 if __name__ == "__main__": | 115 if __name__ == "__main__": |
57 | 116 |
58 # Echo the command line | 117 # Echo the command line |
59 print ' '.join(sys.argv) | 118 print ' '.join(sys.argv) |
60 | 119 |
65 output_broadpeaks = None | 124 output_broadpeaks = None |
66 output_gappedpeaks = None | 125 output_gappedpeaks = None |
67 output_narrowpeaks = None | 126 output_narrowpeaks = None |
68 output_treat_pileup = None | 127 output_treat_pileup = None |
69 output_lambda_bedgraph = None | 128 output_lambda_bedgraph = None |
129 output_bigwig = None | |
70 output_xls_to_interval_peaks_file = None | 130 output_xls_to_interval_peaks_file = None |
71 output_peaks = None | 131 output_peaks = None |
72 output_bdgcmp = None | 132 output_bdgcmp = None |
133 | |
134 # Other initialisations | |
135 chrom_sizes_file = None | |
73 | 136 |
74 # Build the MACS 2.1 command line | 137 # Build the MACS 2.1 command line |
75 # Initial arguments are always the same: command & input ChIP-seq file name | 138 # Initial arguments are always the same: command & input ChIP-seq file name |
76 cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] | 139 cmdline = ["macs2 %s -t %s" % (sys.argv[1],sys.argv[2])] |
77 | 140 |
83 cmdline.append("--format=%s" % format_) | 146 cmdline.append("--format=%s" % format_) |
84 elif arg.startswith('--name='): | 147 elif arg.startswith('--name='): |
85 # Replace whitespace in name with underscores | 148 # Replace whitespace in name with underscores |
86 experiment_name = '_'.join(arg.split('=')[1].split()) | 149 experiment_name = '_'.join(arg.split('=')[1].split()) |
87 cmdline.append("--name=%s" % experiment_name) | 150 cmdline.append("--name=%s" % experiment_name) |
151 elif arg.startswith('--length='): | |
152 # Extract chromosome size file | |
153 chrom_sizes_file = arg.split('=')[1] | |
88 elif arg.startswith('--output-'): | 154 elif arg.startswith('--output-'): |
89 # Handle destinations for output files | 155 # Handle destinations for output files |
90 arg0,filen = arg.split('=') | 156 arg0,filen = arg.split('=') |
91 if arg0 == '--output-summits': | 157 if arg0 == '--output-summits': |
92 output_summits = filen | 158 output_summits = filen |
102 output_narrowpeaks = filen | 168 output_narrowpeaks = filen |
103 elif arg0 == '--output-pileup': | 169 elif arg0 == '--output-pileup': |
104 output_treat_pileup = filen | 170 output_treat_pileup = filen |
105 elif arg0 == '--output-lambda-bedgraph': | 171 elif arg0 == '--output-lambda-bedgraph': |
106 output_lambda_bedgraph = filen | 172 output_lambda_bedgraph = filen |
173 elif arg0 == '--output-bigwig': | |
174 output_bigwig = filen | |
107 elif arg0 == '--output-xls-to-interval': | 175 elif arg0 == '--output-xls-to-interval': |
108 output_xls_to_interval_peaks_file = filen | 176 output_xls_to_interval_peaks_file = filen |
109 elif arg0 == '--output-peaks': | 177 elif arg0 == '--output-peaks': |
110 output_peaks = filen | 178 output_peaks = filen |
111 else: | 179 else: |
140 if output_xls_to_interval_peaks_file is not None: | 208 if output_xls_to_interval_peaks_file is not None: |
141 peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name ) | 209 peaks_xls_file = os.path.join(working_dir,'%s_peaks.xls' % experiment_name ) |
142 if os.path.exists(peaks_xls_file): | 210 if os.path.exists(peaks_xls_file): |
143 convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, | 211 convert_xls_to_interval(peaks_xls_file,output_xls_to_interval_peaks_file, |
144 header='peaks file') | 212 header='peaks file') |
213 | |
214 # Create bigWig from bedGraph, if requested | |
215 if output_bigwig is not None: | |
216 treat_bedgraph_file = os.path.join(working_dir,'%s_treat_pileup.bdg' % experiment_name) | |
217 if os.path.exists(treat_bedgraph_file): | |
218 make_bigwig_from_bedgraph(treat_bedgraph_file,output_bigwig, | |
219 chrom_sizes_file,working_dir) | |
145 | 220 |
146 # Move MACS2 output files from working dir to their final destinations | 221 # Move MACS2 output files from working dir to their final destinations |
147 move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits) | 222 move_file(working_dir,"%s_summits.bed" % experiment_name,output_summits) |
148 move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks) | 223 move_file(working_dir,"%s_peaks.xls" % experiment_name,output_peaks) |
149 move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks) | 224 move_file(working_dir,"%s_peaks.narrowPeak" % experiment_name,output_narrowpeaks) |