|
2
|
1 import argparse
|
|
|
2 import os
|
|
|
3 import shutil
|
|
|
4 import subprocess
|
|
|
5 import sys
|
|
|
6 import tempfile
|
|
|
7
|
|
|
8 BUFF_SIZE = 1048576
|
|
|
9 DESIGN_FILE = 'design.tabular'
|
|
|
10
|
|
|
11 parser = argparse.ArgumentParser()
|
|
|
12 parser.add_argument('--all_events_table', dest='all_events_table', help='Output all events table file')
|
|
|
13 parser.add_argument('--alphascale', dest='alphascale', type=float, default=None, help='Alpha scaling factor')
|
|
|
14 parser.add_argument('--chrom_len_file', dest='chrom_len_file', help='File listing the lengths of all chromosomes')
|
|
|
15 parser.add_argument('--control', dest='control', default=None, help='Input control files and data formats')
|
|
|
16 parser.add_argument('--diffp', dest='diffp', type=float, default=None, help='Minimum p-value for reporting differential enrichment')
|
|
|
17 parser.add_argument('--edgerod', dest='edgerod', type=float, default=None, help='EdgeR over-dispersion parameter value')
|
|
|
18 parser.add_argument('--exclude', dest='exclude', default=None, help='File containing a set of regions to ignore during MultiGPS training')
|
|
|
19 parser.add_argument('--expt', dest='expt', default=None, help="Input expt files and data formats")
|
|
|
20 parser.add_argument('--eventsaretxt', dest='eventsaretxt', default=None, help='Append a .txt extension to the events file for browser rendering')
|
|
|
21 parser.add_argument('--fixedalpha', dest='fixedalpha', type=int, default=None, help='Impose this alpha')
|
|
|
22 parser.add_argument('--fixedmodelrange', dest='fixedmodelrange', default=None, help='Keep binding model range fixed to inital size')
|
|
|
23 parser.add_argument('--fixedpb', dest='fixedpb', type=int, default=None, help='Fixed per-base limit')
|
|
|
24 parser.add_argument('--fixedscaling', dest='fixedscaling', type=float, default=None, help='Multiply control counts by total tag count ratio and then by this factor')
|
|
|
25 parser.add_argument('--format', dest='format', default=None, help='Input expt file data format')
|
|
|
26 parser.add_argument('--gaussmodelsmoothing', dest='gaussmodelsmoothing', default=None, help='Use Gaussian model smoothing')
|
|
|
27 parser.add_argument('--gausssmoothparam', dest='gausssmoothparam', type=int, default=None, help='Smoothing factor')
|
|
|
28 parser.add_argument('--input_item', dest='input_items', action='append', nargs=7, default=None, help="Input files, attributes and options")
|
|
|
29 parser.add_argument('--jointinmodel', dest='jointinmodel', default=None, help='Allow joint events in model updates')
|
|
|
30 parser.add_argument('--mappability', dest='mappability', type=float, default=None, help='Fraction of the genome that is mappable for these experiments')
|
|
|
31 parser.add_argument('--maxtrainingrounds', dest='maxtrainingrounds', type=int, default=None, help='Maximum number of training rounds for updating binding event read distributions')
|
|
|
32 parser.add_argument('--medianscale', dest='medianscale', default=None, help='Use the median signal/control ratio as the scaling factor')
|
|
|
33 parser.add_argument('--meme1proc', dest='meme1proc', default=None, help='Do not run the parallel version of meme')
|
|
|
34 parser.add_argument('--mememaxw', dest='mememaxw', type=int, default=None, help='Maximum motif width for MEME')
|
|
|
35 parser.add_argument('--mememinw', dest='mememinw', type=int, default=None, help='Minimum motif width for MEME')
|
|
|
36 parser.add_argument('--memenmotifs', dest='memenmotifs', type=int, default=None, help='Number of motifs MEME should find for each condition')
|
|
|
37 parser.add_argument('--minfold', dest='minfold', type=float, default=None, help='Minimum event fold-change vs scaled control')
|
|
|
38 parser.add_argument('--minqvalue', dest='minqvalue', type=float, default=None, help='Minimum Q-value (corrected p-value) of reported binding events')
|
|
|
39 parser.add_argument('--minmodelupdateevents', dest='minmodelupdateevents', type=int, default=None, help='Minimum number of events to support an update of the read distribution')
|
|
|
40 parser.add_argument('--mlconfignotshared', dest='mlconfignotshared', default=None, help='Share component configs in the ML step')
|
|
|
41 parser.add_argument('--nocache', dest='nocache', default=None, help='Turn off caching of the entire set of experiments')
|
|
|
42 parser.add_argument('--nodifftests', dest='nodifftests', default=None, help='Run differential enrichment tests')
|
|
|
43 parser.add_argument('--nomodelsmoothing', dest='nomodelsmoothing', default=None, help='Perform binding model smoothing')
|
|
|
44 parser.add_argument('--nomodelupdate', dest='nomodelupdate', default=None, help='Perform binding model updates')
|
|
|
45 parser.add_argument('--nomotifprior', dest='nomotifprior', default=None, help='Perform motif-finding only')
|
|
|
46 parser.add_argument('--nomotifs', dest='nomotifs', default=None, help='Perform motif-finding and motif priors')
|
|
|
47 parser.add_argument('--nonunique', dest='nonunique', default=None, help='Use non-unique reads')
|
|
|
48 parser.add_argument('--noposprior', dest='noposprior', default=None, help='Perform inter-experiment positional prior')
|
|
|
49 parser.add_argument('--noscaling', dest='noscaling', default=None, help='Do not use signal vs control scaling')
|
|
|
50 parser.add_argument('--output_bed', dest='output_bed', help='Output bed results file')
|
|
|
51 parser.add_argument('--output_html', dest='output_html', help='Output html results file')
|
|
|
52 parser.add_argument('--output_html_files_path', dest='output_html_files_path', help='Output html extra files')
|
|
|
53 parser.add_argument('--plotscaling', dest='plotscaling', default=None, help='Plot diagnostic information for the chosen scaling method')
|
|
|
54 parser.add_argument('--poissongausspb', dest='poissongausspb', type=int, default=None, help='Poisson threshold for filtering per base')
|
|
|
55 parser.add_argument('--prlogconf', dest='prlogconf', type=int, default=None, help='Poisson log threshold for potential region scanning')
|
|
|
56 parser.add_argument('--probshared', dest='probshared', type=float, default=None, help='Probability that events are shared across conditions')
|
|
|
57 parser.add_argument('--readdistributionfile', dest='readdistributionfile', default=None, help='Optional binding event read distribution file for initializing models')
|
|
|
58 parser.add_argument('--regressionscale', dest='regressionscale', default=None, help='Use scaling by regression on binned tag counts')
|
|
|
59 parser.add_argument('--replicates_counts', dest='replicates_counts', help='Output replicates counts file')
|
|
|
60 parser.add_argument('--scalewin', dest='scalewin', type=int, default=None, help='Window size for estimating scaling ratios')
|
|
|
61 parser.add_argument('--seq', dest='seq', default=None, help='Reference genome path')
|
|
|
62 parser.add_argument('--sesscale', dest='sesscale', default=None, help='Estimate scaling factor by SES')
|
|
|
63 parser.add_argument('--splinesmoothparam', dest='splinesmoothparam', type=int, default=None, help='Spline smoothing parameter')
|
|
|
64 parser.add_argument('--threads', dest='threads', type=int, default=4, help='The number of threads to run')
|
|
|
65 args = parser.parse_args()
|
|
|
66
|
|
|
67
|
|
|
68 def generate_design_file(input_items):
|
|
|
69 design_file = open(DESIGN_FILE, 'w')
|
|
|
70 for item in input_items:
|
|
|
71 file_name, label, file_format, condition_name, replicate_name, experiment_type, fixed_read_count = item
|
|
|
72 file_format = file_format.upper()
|
|
|
73 items = [file_name, label, file_format, condition_name]
|
|
|
74 if replicate_name not in ['None', None, '']:
|
|
|
75 items.append(replicate_name)
|
|
|
76 if experiment_type not in ['None', None, '']:
|
|
|
77 items.append(experiment_type)
|
|
|
78 if fixed_read_count not in ['None', None, '']:
|
|
|
79 items.append(fixed_read_count)
|
|
|
80 design_file.write('%s\n' % '\t'.join(items))
|
|
|
81 design_file.close()
|
|
|
82
|
|
|
83
|
|
|
84 def get_file_with_extension(dir, ext):
|
|
|
85 file_list = [f for f in os.listdir(dir) if f.endswith(ext)]
|
|
|
86 if len(file_list) == 1:
|
|
|
87 return file_list[0]
|
|
|
88 stop_err('Error running MultiGPS: output file with extension "%s" not generated.' % ext)
|
|
|
89
|
|
|
90
|
|
|
91 def get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout, include_stdout=False):
|
|
|
92 tmp_stderr.close()
|
|
|
93 # Get stderr, allowing for case where it's very large.
|
|
|
94 tmp_stderr = open(tmp_err, 'rb')
|
|
|
95 stderr_str = ''
|
|
|
96 buffsize = BUFF_SIZE
|
|
|
97 try:
|
|
|
98 while True:
|
|
|
99 stderr_str += tmp_stderr.read(buffsize)
|
|
|
100 if not stderr_str or len(stderr_str) % buffsize != 0:
|
|
|
101 break
|
|
|
102 except OverflowError:
|
|
|
103 pass
|
|
|
104 tmp_stderr.close()
|
|
|
105 if include_stdout:
|
|
|
106 tmp_stdout = open(tmp_out, 'rb')
|
|
|
107 stdout_str = ''
|
|
|
108 buffsize = BUFF_SIZE
|
|
|
109 try:
|
|
|
110 while True:
|
|
|
111 stdout_str += tmp_stdout.read(buffsize)
|
|
|
112 if not stdout_str or len(stdout_str) % buffsize != 0:
|
|
|
113 break
|
|
|
114 except OverflowError:
|
|
|
115 pass
|
|
|
116 tmp_stdout.close()
|
|
|
117 if include_stdout:
|
|
|
118 return 'STDOUT\n%s\n\nSTDERR\n%s\n' % (stdout_str, stderr_str)
|
|
|
119 return stderr_str
|
|
|
120
|
|
|
121
|
|
|
122 def stop_err(msg):
|
|
|
123 sys.stderr.write(msg)
|
|
|
124 sys.exit(1)
|
|
|
125
|
|
|
126
|
|
|
127 # Preparation.
|
|
|
128 tmp_dir = tempfile.mkdtemp(prefix="tmp-multigps-")
|
|
|
129 os.makedirs(args.output_html_files_path)
|
|
|
130 # Build the command line.
|
|
|
131 cmd = 'multigps'
|
|
|
132 # General options
|
|
|
133 cmd += ' --threads %s' % args.threads
|
|
|
134 if args.eventsaretxt is not None:
|
|
|
135 # Append .txt extensions to events hrefs
|
|
|
136 # in output dataset so files will render
|
|
|
137 # in the browser.
|
|
|
138 cmd += ' --eventsaretxt'
|
|
|
139 if args.meme1proc is not None:
|
|
|
140 # Do not run the parallel version of meme.
|
|
|
141 cmd += ' --meme1proc'
|
|
|
142 # Experiment.
|
|
|
143 if args.input_items is not None:
|
|
|
144 generate_design_file(args.input_items)
|
|
|
145 cmd += ' --design %s' % DESIGN_FILE
|
|
|
146 else:
|
|
|
147 if args.expt is not None:
|
|
|
148 cmd += ' --expt %s' % args.expt
|
|
|
149 if args.format is not None:
|
|
|
150 cmd += ' --format %s' % args.format
|
|
|
151 if args.control is not None:
|
|
|
152 cmd += ' --ctrl %s' % args.control
|
|
|
153 cmd += ' --geninfo %s' % args.chrom_len_file
|
|
|
154 # Advanced options.
|
|
|
155 if args.seq is not None:
|
|
|
156 cmd += ' --seq %s' % args.seq
|
|
|
157 # Limits on how many reads
|
|
|
158 if args.fixedpb is not None:
|
|
|
159 cmd += ' --fixedpb %d' % args.fixedpb
|
|
|
160 if args.poissongausspb is not None:
|
|
|
161 cmd += ' --poissongausspb %d' % args.poissongausspb
|
|
|
162 if args.nonunique is not None:
|
|
|
163 cmd += ' --nonunique'
|
|
|
164 if args.mappability is not None:
|
|
|
165 cmd += ' --mappability %4f' % args.mappability
|
|
|
166 if args.nocache is not None:
|
|
|
167 cmd += ' --nocache'
|
|
|
168 # Scaling data.'
|
|
|
169 if args.noscaling is not None:
|
|
|
170 cmd += ' --noscaling %s' % args.noscaling
|
|
|
171 if args.medianscale is not None:
|
|
|
172 cmd += ' --medianscale %s' % args.medianscale
|
|
|
173 if args.regressionscale is not None:
|
|
|
174 cmd += ' --regressionscale %s' % args.regressionscale
|
|
|
175 if args.sesscale is not None:
|
|
|
176 cmd += ' --sesscale %s' % args.sesscale
|
|
|
177 if args.fixedscaling is not None:
|
|
|
178 cmd += ' --fixedscaling %4f' % args.fixedscaling
|
|
|
179 if args.scalewin is not None:
|
|
|
180 cmd += ' --scalewin %d' % args.scalewin
|
|
|
181 if args.plotscaling is not None:
|
|
|
182 cmd += ' --plotscaling %s' % args.plotscaling
|
|
|
183 # Running MultiGPS.
|
|
|
184 if args.readdistributionfile is not None:
|
|
|
185 cmd += ' --d %s' % args.readdistributionfile
|
|
|
186 if args.maxtrainingrounds is not None:
|
|
|
187 cmd += ' --r %s' % args.maxtrainingrounds
|
|
|
188 if args.nomodelupdate is not None:
|
|
|
189 cmd += ' --nomodelupdate'
|
|
|
190 if args.minmodelupdateevents is not None:
|
|
|
191 cmd += ' --minmodelupdateevents %d' % args.minmodelupdateevents
|
|
|
192 if args.nomodelsmoothing is not None:
|
|
|
193 cmd += ' --nomodelsmoothing'
|
|
|
194 if args.splinesmoothparam is not None:
|
|
|
195 cmd += ' --splinesmoothparam %d' % args.splinesmoothparam
|
|
|
196 if args.gaussmodelsmoothing is not None:
|
|
|
197 cmd += ' --gaussmodelsmoothing'
|
|
|
198 if args.gausssmoothparam is not None:
|
|
|
199 cmd += ' --gausssmoothparam %s' % args.gausssmoothparam
|
|
|
200 if args.jointinmodel is not None:
|
|
|
201 cmd += ' --jointinmodel'
|
|
|
202 if args.fixedmodelrange is not None:
|
|
|
203 cmd += ' --fixedmodelrange'
|
|
|
204 if args.prlogconf is not None:
|
|
|
205 cmd += ' --prlogconf %d' % args.prlogconf
|
|
|
206 if args.fixedalpha is not None:
|
|
|
207 cmd += ' --fixedalpha %d' % args.fixedalpha
|
|
|
208 if args.alphascale is not None:
|
|
|
209 cmd += ' --alphascale %4f' % args.alphascale
|
|
|
210 if args.mlconfignotshared is not None:
|
|
|
211 cmd += ' --mlconfignotshared'
|
|
|
212 if args.exclude not in [None, 'None']:
|
|
|
213 cmd += ' --exclude %s' % args.exclude_file
|
|
|
214 # MultiGPS priors
|
|
|
215 if args.noposprior is not None:
|
|
|
216 cmd += ' --noposprior'
|
|
|
217 if args.probshared is not None:
|
|
|
218 cmd += ' --probshared %4f' % args.probshared
|
|
|
219 if args.memenmotifs is not None:
|
|
|
220 cmd += ' --memenmotifs %d' % args.memenmotifs
|
|
|
221 if args.mememinw is not None:
|
|
|
222 cmd += ' --mememinw %d' % args.mememinw
|
|
|
223 if args.mememaxw is not None:
|
|
|
224 cmd += ' --mememaxw %d' % args.mememaxw
|
|
|
225 if args.nomotifs is not None:
|
|
|
226 cmd += ' --nomotifs'
|
|
|
227 if args.nomotifprior is not None:
|
|
|
228 cmd += ' --nomotifprior'
|
|
|
229 # Reporting binding events
|
|
|
230 if args.minqvalue is not None:
|
|
|
231 cmd += ' --q %4f' % args.minqvalue
|
|
|
232 if args.minfold is not None:
|
|
|
233 cmd += ' --minfold %4f' % args.minfold
|
|
|
234 if args.nodifftests is not None:
|
|
|
235 cmd += ' --nodifftests'
|
|
|
236 if args.edgerod is not None:
|
|
|
237 cmd += ' --edgerod %4f' % args.edgerod
|
|
|
238 if args.diffp is not None:
|
|
|
239 cmd += ' --diffp %4f' % args.diffp
|
|
|
240 # Output directory.
|
|
|
241 cmd += ' --out %s' % args.output_html_files_path
|
|
|
242 # Define command response buffers.
|
|
|
243 tmp_out = tempfile.NamedTemporaryFile(dir=tmp_dir).name
|
|
|
244 tmp_stdout = open(tmp_out, 'wb')
|
|
|
245 tmp_err = tempfile.NamedTemporaryFile(dir=tmp_dir).name
|
|
|
246 tmp_stderr = open(tmp_err, 'wb')
|
|
|
247 tmp_filename = tempfile.NamedTemporaryFile(dir=tmp_dir).name
|
|
|
248 # Execute the command.
|
|
|
249 proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, stdout=tmp_stdout, shell=True)
|
|
|
250 rc = proc.wait()
|
|
|
251 if rc != 0:
|
|
|
252 error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout)
|
|
|
253 stop_err(error_message)
|
|
|
254 # Move each output file to the approapriate output dataset path.
|
|
|
255 output_bed = get_file_with_extension(args.output_html_files_path, 'bed')
|
|
|
256 shutil.move(os.path.join(args.output_html_files_path, output_bed), args.output_bed)
|
|
|
257 output_html = get_file_with_extension(args.output_html_files_path, 'html')
|
|
|
258 shutil.move(os.path.join(args.output_html_files_path, output_html), args.output_html)
|
|
|
259 replicates_counts = get_file_with_extension(args.output_html_files_path, 'counts')
|
|
|
260 shutil.move(os.path.join(args.output_html_files_path, replicates_counts), args.replicates_counts)
|
|
|
261 all_events_table = get_file_with_extension(args.output_html_files_path, 'table.txt')
|
|
|
262 shutil.move(os.path.join(args.output_html_files_path, all_events_table), args.all_events_table)
|
|
|
263 # Clean up.
|
|
|
264 if os.path.exists(tmp_dir):
|
|
|
265 shutil.rmtree(tmp_dir)
|