comparison multigps.py @ 12:77a7c882dd36 draft

Uploaded
author greg
date Tue, 05 Jan 2016 08:21:30 -0500
parents a75e532f0441
children 2a9447ce6bd5
comparison
equal deleted inserted replaced
11:a75e532f0441 12:77a7c882dd36
1 import argparse 1 import optparse
2 import os 2 import os
3 import shutil 3 import shutil
4 import subprocess 4 import subprocess
5 import sys 5 import sys
6 import tempfile 6 import tempfile
55 55
56 def stop_err(msg): 56 def stop_err(msg):
57 sys.stderr.write(msg) 57 sys.stderr.write(msg)
58 sys.exit(1) 58 sys.exit(1)
59 59
60 parser = argparse.ArgumentParser() 60 parser = optparse.OptionParser()
61 parser.add_argument('--input_item', dest='input_items', action='append', nargs=7, help="Input datasets and associated parameters") 61 parser.add_option('--input_item', dest='input_items', action='append', nargs=7, help="Input datasets and associated parameters")
62 parser.add_argument('--threads', dest='threads', type=int, default=4, help='The number of threads to run') 62 parser.add_option('--threads', dest='threads', type="int", default=4, help='The number of threads to run')
63 parser.add_argument('--multigps_jar', dest='multigps_jar', help='Path to the MultiGPS jar file') 63 parser.add_option('--multigps_jar', dest='multigps_jar', help='Path to the MultiGPS jar file')
64 parser.add_argument('--geninfo', dest='geninfo', help='File listing the lengths of all chromosomes') 64 parser.add_option('--geninfo', dest='geninfo', help='File listing the lengths of all chromosomes')
65 parser.add_argument('--use_motif', dest='use_motif', help='Perform motif-finding or use a motif-prior') 65 parser.add_option('--use_motif', dest='use_motif', help='Perform motif-finding or use a motif-prior')
66 parser.add_argument('--seq_dir', dest='seq_dir', default=None, help='Directory containing fasta files corresponding to every named chromosome') 66 parser.add_option('--seq_dir', dest='seq_dir', default=None, help='Directory containing fasta files corresponding to every named chromosome')
67 parser.add_argument('--positional_prior', dest='positional_prior', default=None, help='Perform inter-experiment positional prior') 67 parser.add_option('--positional_prior', dest='positional_prior', default=None, help='Perform inter-experiment positional prior')
68 parser.add_argument('--events_shared_probability', dest='events_shared_probability', type=float, default=0.9, help='Probability that events are shared across conditions') 68 parser.add_option('--events_shared_probability', dest='events_shared_probability', type=float, default=0.9, help='Probability that events are shared across conditions')
69 parser.add_argument('--motifs', dest='motifs', default=None, help='Perform motif-finding and motif priors') 69 parser.add_option('--motifs', dest='motifs', default=None, help='Perform motif-finding and motif priors')
70 parser.add_argument('--motif_finding_only', dest='motif_finding_only', default=None, help='Perform motif-finding only') 70 parser.add_option('--motif_finding_only', dest='motif_finding_only', default=None, help='Perform motif-finding only')
71 parser.add_argument('--num_motifs', dest='num_motifs', type=int, default=None, help='Number of motifs MEME should find for each condition') 71 parser.add_option('--num_motifs', dest='num_motifs', type="int", default=None, help='Number of motifs MEME should find for each condition')
72 parser.add_argument('--mememinw', dest='mememinw', type=int, default=None, help='Minimum motif width for MEME') 72 parser.add_option('--mememinw', dest='mememinw', type="int", default=None, help='Minimum motif width for MEME')
73 parser.add_argument('--mememaxw', dest='mememaxw', type=int, default=None, help='Maximum motif width for MEME') 73 parser.add_option('--mememaxw', dest='mememaxw', type="int", default=None, help='Maximum motif width for MEME')
74 parser.add_argument('--fixedpb', dest='fixedpb', type=int, default=None, help='Fixed per-base limit') 74 parser.add_option('--fixedpb', dest='fixedpb', type="int", default=None, help='Fixed per-base limit')
75 parser.add_argument('--poissongausspb', dest='poissongausspb', type=int, default=None, help='Poisson threshold for filtering per base') 75 parser.add_option('--poissongausspb', dest='poissongausspb', type="int", default=None, help='Poisson threshold for filtering per base')
76 parser.add_argument('--non_unique_reads', dest='non_unique_reads', default=None, help='Use non-unique reads') 76 parser.add_option('--non_unique_reads', dest='non_unique_reads', default=None, help='Use non-unique reads')
77 parser.add_argument('--minqvalue', dest='minqvalue', type=int, default=None, help='Minimum Q-value (corrected p-value) of reported binding events') 77 parser.add_option('--minqvalue', dest='minqvalue', type="int", default=None, help='Minimum Q-value (corrected p-value) of reported binding events')
78 parser.add_argument('--minfold', dest='minfold', type=int, default=None, help='Minimum event fold-change vs scaled control') 78 parser.add_option('--minfold', dest='minfold', type="int", default=None, help='Minimum event fold-change vs scaled control')
79 parser.add_argument('--diff_enrichment_tests', dest='diff_enrichment_tests', help='Run differential enrichment tests') 79 parser.add_option('--diff_enrichment_tests', dest='diff_enrichment_tests', help='Run differential enrichment tests')
80 parser.add_argument('--edgerod', dest='edgerod', type=int, default=None, help='EdgeR over-dispersion parameter value') 80 parser.add_option('--edgerod', dest='edgerod', type="int", default=None, help='EdgeR over-dispersion parameter value')
81 parser.add_argument('--diffp', dest='diffp', type=int, default=None, help='Minimum p-value for reporting differential enrichment') 81 parser.add_option('--diffp', dest='diffp', type="int", default=None, help='Minimum p-value for reporting differential enrichment')
82 parser.add_argument('--noscaling', dest='noscaling', default=None, help='Do not use signal vs control scaling') 82 parser.add_option('--noscaling', dest='noscaling', default=None, help='Do not use signal vs control scaling')
83 parser.add_argument('--medianscale', dest='medianscale', default=None, help='Use the median signal/control ratio as the scaling factor') 83 parser.add_option('--medianscale', dest='medianscale', default=None, help='Use the median signal/control ratio as the scaling factor')
84 parser.add_argument('--sesscale', dest='sesscale', default=None, help='Estimate scaling factor by SES') 84 parser.add_option('--sesscale', dest='sesscale', default=None, help='Estimate scaling factor by SES')
85 parser.add_argument('--scalewin', dest='scalewin', type=int, default=None, help='Window size for estimating scaling ratios') 85 parser.add_option('--scalewin', dest='scalewin', type="int", default=None, help='Window size for estimating scaling ratios')
86 parser.add_argument('--max_training_rounds', dest='max_training_rounds', type=int, default=None, help='Maximum number of training rounds for updating binding event read distributions') 86 parser.add_option('--max_training_rounds', dest='max_training_rounds', type="int", default=None, help='Maximum number of training rounds for updating binding event read distributions')
87 parser.add_argument('--exclude_file', dest='exclude_file', default=None, help='File containing a set of regions to ignore during MultiGPS training') 87 parser.add_option('--exclude_file', dest='exclude_file', default=None, help='File containing a set of regions to ignore during MultiGPS training')
88 parser.add_argument('--binding_model_updates', dest='binding_model_updates', default=None, help='Perform binding model updates') 88 parser.add_option('--binding_model_updates', dest='binding_model_updates', default=None, help='Perform binding model updates')
89 parser.add_argument('--minmodelupdateevents', dest='minmodelupdateevents', type=int, default=None, help='Minimum number of events to support an update of the read distribution') 89 parser.add_option('--minmodelupdateevents', dest='minmodelupdateevents', type="int", default=None, help='Minimum number of events to support an update of the read distribution')
90 parser.add_argument('--binding_model_smoothing', dest='binding_model_smoothing', default=None, help='Binding model smoothing') 90 parser.add_option('--binding_model_smoothing', dest='binding_model_smoothing', default=None, help='Binding model smoothing')
91 parser.add_argument('--spline_smooth', dest='spline_smooth', type=int, default=None, help='Binding model smoothing value') 91 parser.add_option('--spline_smooth', dest='spline_smooth', type="int", default=None, help='Binding model smoothing value')
92 parser.add_argument('--gauss_smooth', dest='gauss_smooth', type=int, default=None, help='Gaussian smoothing std dev') 92 parser.add_option('--gauss_smooth', dest='gauss_smooth', type="int", default=None, help='Gaussian smoothing std dev')
93 parser.add_argument('--joint_in_model', dest='joint_in_model', default=None, help='Allow joint events in model updates') 93 parser.add_option('--joint_in_model', dest='joint_in_model', default=None, help='Allow joint events in model updates')
94 parser.add_argument('--ml_config_not_shared', dest='ml_config_not_shared', default=None, help='Share component configs in the ML step') 94 parser.add_option('--ml_config_not_shared', dest='ml_config_not_shared', default=None, help='Share component configs in the ML step')
95 parser.add_argument('--output_html_path', dest='output_html_path', help='Output html results file') 95 parser.add_option('--output_html_path', dest='output_html_path', help='Output html results file')
96 parser.add_argument('--output_html_files_path', dest='output_html_files_path', help='Output html extra files') 96 parser.add_option('--output_html_files_path', dest='output_html_files_path', help='Output html extra files')
97 parser.add_argument('--output_process_path', dest='output_process_path', default=None, help='Output file for capturing stdout') 97 parser.add_option('--output_process_path', dest='output_process_path', default=None, help='Output file for capturing stdout')
98 args = parser.parse_args() 98 options, args = parser.parse_args()
99 99
100 try: 100 try:
101 # Preparation. 101 # Preparation.
102 tmp_dir = tempfile.mkdtemp(prefix="tmp-multigps-") 102 tmp_dir = tempfile.mkdtemp(prefix="tmp-multigps-")
103 generate_design_file(args.input_items, DESIGN_FILE) 103 generate_design_file(options.input_items, DESIGN_FILE)
104 # Build the command line. 104 # Build the command line.
105 cmd_list = ['java -jar %s --verbose' % args.multigps_jar] 105 cmd_list = ['java -jar %s --verbose' % options.multigps_jar]
106 cmd_list.append('--threads %d' % args.threads) 106 cmd_list.append('--threads %d' % options.threads)
107 cmd_list.append('--geninfo %s' % args.geninfo) 107 cmd_list.append('--geninfo %s' % options.geninfo)
108 if args.use_motif == 'yes': 108 if options.use_motif == 'yes':
109 cmd_list.append('--seq %s' % args.seq_dir) 109 cmd_list.append('--seq %s' % options.seq_dir)
110 if args.positional_prior == 'no': 110 if options.positional_prior == 'no':
111 cmd_list.append('--noposprior') 111 cmd_list.append('--noposprior')
112 cmd_list.append('--probshared %3f' % args.events_shared_probability) 112 cmd_list.append('--probshared %3f' % options.events_shared_probability)
113 if args.motifs == 'no': 113 if options.motifs == 'no':
114 cmd_list.append('--nomotifs') 114 cmd_list.append('--nomotifs')
115 if args.motif_finding_only == 'yes': 115 if options.motif_finding_only == 'yes':
116 cmd_list.append('--nomotifprior') 116 cmd_list.append('--nomotifprior')
117 if args.num_motifs is not None: 117 if options.num_motifs is not None:
118 cmd_list.append('--memenmotifs %d' % args.num_motifs) 118 cmd_list.append('--memenmotifs %d' % options.num_motifs)
119 if args.mememinw is not None: 119 if options.mememinw is not None:
120 cmd_list.append('--mememinw %d' % args.mememinw) 120 cmd_list.append('--mememinw %d' % options.mememinw)
121 if args.mememaxw is not None: 121 if options.mememaxw is not None:
122 cmd_list.append('--mememaxw %d' % args.mememaxw) 122 cmd_list.append('--mememaxw %d' % options.mememaxw)
123 if args.fixedpb is not None: 123 if options.fixedpb is not None:
124 cmd_list.append('--fixedpb %d' % args.fixedpb) 124 cmd_list.append('--fixedpb %d' % options.fixedpb)
125 if args.poissongausspb is not None: 125 if options.poissongausspb is not None:
126 cmd_list.append('--poissongausspb %d' % args.poissongausspb) 126 cmd_list.append('--poissongausspb %d' % options.poissongausspb)
127 if args.non_unique_reads == 'yes': 127 if options.non_unique_reads == 'yes':
128 cmd_list.append('--nonunique') 128 cmd_list.append('--nonunique')
129 if args.minqvalue is not None: 129 if options.minqvalue is not None:
130 cmd_list.append('--q %d' % args.minqvalue) 130 cmd_list.append('--q %d' % options.minqvalue)
131 if args.minfold is not None: 131 if options.minfold is not None:
132 cmd_list.append('--minfold %d' % args.minfold) 132 cmd_list.append('--minfold %d' % options.minfold)
133 if args.diff_enrichment_tests == 'no': 133 if options.diff_enrichment_tests == 'no':
134 cmd_list.append('--nodifftests') 134 cmd_list.append('--nodifftests')
135 if args.edgerod is not None: 135 if options.edgerod is not None:
136 cmd_list.append('--edgerod %d' % args.edgerod) 136 cmd_list.append('--edgerod %d' % options.edgerod)
137 if args.diffp is not None: 137 if options.diffp is not None:
138 cmd_list.append('--diffp %d' % args.diffp) 138 cmd_list.append('--diffp %d' % options.diffp)
139 if args.noscaling == 'no': 139 if options.noscaling == 'no':
140 cmd_list.append('--noscaling') 140 cmd_list.append('--noscaling')
141 if args.medianscale == "yes": 141 if options.medianscale == "yes":
142 cmd_list.append('--medianscale') 142 cmd_list.append('--medianscale')
143 if args.sesscale == "yes": 143 if options.sesscale == "yes":
144 cmd_list.append('--sesscale') 144 cmd_list.append('--sesscale')
145 if args.scalewin is not None: 145 if options.scalewin is not None:
146 cmd_list.append('--scalewin %d' % args.scalewin) 146 cmd_list.append('--scalewin %d' % options.scalewin)
147 if args.max_training_rounds is not None: 147 if options.max_training_rounds is not None:
148 cmd_list.append('--r %d' % args.max_training_rounds) 148 cmd_list.append('--r %d' % options.max_training_rounds)
149 if args.exclude_file not in [None, 'None']: 149 if options.exclude_file not in [None, 'None']:
150 cmd_list.append('--exclude %s' % args.exclude_file) 150 cmd_list.append('--exclude %s' % options.exclude_file)
151 if args.binding_model_updates == 'no': 151 if options.binding_model_updates == 'no':
152 cmd_list.append('--nomodelupdate') 152 cmd_list.append('--nomodelupdate')
153 if args.minmodelupdateevents is not None: 153 if options.minmodelupdateevents is not None:
154 cmd_list.append('--minmodelupdateevents %d' % args.minmodelupdateevents) 154 cmd_list.append('--minmodelupdateevents %d' % options.minmodelupdateevents)
155 if args.binding_model_smoothing == 'yes': 155 if options.binding_model_smoothing == 'yes':
156 if args.spline_smooth is not None: 156 if options.spline_smooth is not None:
157 cmd_list.append('--splinesmoothparam %d' % args.spline_smooth) 157 cmd_list.append('--splinesmoothparam %d' % options.spline_smooth)
158 else: 158 else:
159 cmd_list.append('--nomodelsmoothing') 159 cmd_list.append('--nomodelsmoothing')
160 if args.gauss_smooth is not None: 160 if options.gauss_smooth is not None:
161 cmd_list.append('--gaussmodelsmoothing') 161 cmd_list.append('--gaussmodelsmoothing')
162 cmd_list.append('--gausssmoothparam %d' % args.gauss_smooth) 162 cmd_list.append('--gausssmoothparam %d' % options.gauss_smooth)
163 if args.joint_in_model == 'yes': 163 if options.joint_in_model == 'yes':
164 cmd_list.append('--jointinmodel') 164 cmd_list.append('--jointinmodel')
165 if args.ml_config_not_shared == 'no': 165 if options.ml_config_not_shared == 'no':
166 cmd_list.append('--mlconfignotshared') 166 cmd_list.append('--mlconfignotshared')
167 cmd_list.append('--design %s' % DESIGN_FILE) 167 cmd_list.append('--design %s' % DESIGN_FILE)
168 cmd_list.append('--out %s' % args.output_html_files_path) 168 cmd_list.append('--out %s' % options.output_html_files_path)
169 if args.output_process_path is None: 169 if options.output_process_path is None:
170 cmd_list.append('2>/dev/null') 170 cmd_list.append('2>/dev/null')
171 else: 171 else:
172 cmd_list.append('2>%s' % args.output_process_path) 172 cmd_list.append('2>%s' % options.output_process_path)
173 cmd = ' '.join(cmd_list) 173 cmd = ' '.join(cmd_list)
174 # Define command response buffers. 174 # Define command response buffers.
175 tmp_out = tempfile.NamedTemporaryFile(dir=tmp_dir).name 175 tmp_out = tempfile.NamedTemporaryFile(dir=tmp_dir).name
176 tmp_stdout = open(tmp_out, 'wb') 176 tmp_stdout = open(tmp_out, 'wb')
177 tmp_err = tempfile.NamedTemporaryFile(dir=tmp_dir).name 177 tmp_err = tempfile.NamedTemporaryFile(dir=tmp_dir).name
182 rc = proc.wait() 182 rc = proc.wait()
183 if rc != 0: 183 if rc != 0:
184 error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout) 184 error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout)
185 stop_err(error_message) 185 stop_err(error_message)
186 # Move the output HTML file to the output dataset path. 186 # Move the output HTML file to the output dataset path.
187 output_html_filename = 'multiGPS_%s_results.html' % os.path.split(args.output_html_files_path)[1] 187 output_html_filename = 'multiGPS_%s_results.html' % os.path.split(options.output_html_files_path)[1]
188 shutil.move(os.path.join(args.output_html_files_path, output_html_filename), args.output_html_path) 188 shutil.move(os.path.join(options.output_html_files_path, output_html_filename), options.output_html_path)
189 except Exception, e: 189 except Exception, e:
190 stop_err('Error running MultiGPS\n%s\n' % str(e)) 190 stop_err('Error running MultiGPS\n%s\n' % str(e))
191 finally: 191 finally:
192 if os.path.exists(tmp_dir): 192 if os.path.exists(tmp_dir):
193 shutil.rmtree(tmp_dir) 193 shutil.rmtree(tmp_dir)