Mercurial > repos > greg > multigps
comparison multigps.py @ 12:77a7c882dd36 draft
Uploaded
author | greg |
---|---|
date | Tue, 05 Jan 2016 08:21:30 -0500 |
parents | a75e532f0441 |
children | 2a9447ce6bd5 |
comparison
equal
deleted
inserted
replaced
11:a75e532f0441 | 12:77a7c882dd36 |
---|---|
1 import argparse | 1 import optparse |
2 import os | 2 import os |
3 import shutil | 3 import shutil |
4 import subprocess | 4 import subprocess |
5 import sys | 5 import sys |
6 import tempfile | 6 import tempfile |
55 | 55 |
56 def stop_err(msg): | 56 def stop_err(msg): |
57 sys.stderr.write(msg) | 57 sys.stderr.write(msg) |
58 sys.exit(1) | 58 sys.exit(1) |
59 | 59 |
60 parser = argparse.ArgumentParser() | 60 parser = optparse.OptionParser() |
61 parser.add_argument('--input_item', dest='input_items', action='append', nargs=7, help="Input datasets and associated parameters") | 61 parser.add_option('--input_item', dest='input_items', action='append', nargs=7, help="Input datasets and associated parameters") |
62 parser.add_argument('--threads', dest='threads', type=int, default=4, help='The number of threads to run') | 62 parser.add_option('--threads', dest='threads', type="int", default=4, help='The number of threads to run') |
63 parser.add_argument('--multigps_jar', dest='multigps_jar', help='Path to the MultiGPS jar file') | 63 parser.add_option('--multigps_jar', dest='multigps_jar', help='Path to the MultiGPS jar file') |
64 parser.add_argument('--geninfo', dest='geninfo', help='File listing the lengths of all chromosomes') | 64 parser.add_option('--geninfo', dest='geninfo', help='File listing the lengths of all chromosomes') |
65 parser.add_argument('--use_motif', dest='use_motif', help='Perform motif-finding or use a motif-prior') | 65 parser.add_option('--use_motif', dest='use_motif', help='Perform motif-finding or use a motif-prior') |
66 parser.add_argument('--seq_dir', dest='seq_dir', default=None, help='Directory containing fasta files corresponding to every named chromosome') | 66 parser.add_option('--seq_dir', dest='seq_dir', default=None, help='Directory containing fasta files corresponding to every named chromosome') |
67 parser.add_argument('--positional_prior', dest='positional_prior', default=None, help='Perform inter-experiment positional prior') | 67 parser.add_option('--positional_prior', dest='positional_prior', default=None, help='Perform inter-experiment positional prior') |
68 parser.add_argument('--events_shared_probability', dest='events_shared_probability', type=float, default=0.9, help='Probability that events are shared across conditions') | 68 parser.add_option('--events_shared_probability', dest='events_shared_probability', type=float, default=0.9, help='Probability that events are shared across conditions') |
69 parser.add_argument('--motifs', dest='motifs', default=None, help='Perform motif-finding and motif priors') | 69 parser.add_option('--motifs', dest='motifs', default=None, help='Perform motif-finding and motif priors') |
70 parser.add_argument('--motif_finding_only', dest='motif_finding_only', default=None, help='Perform motif-finding only') | 70 parser.add_option('--motif_finding_only', dest='motif_finding_only', default=None, help='Perform motif-finding only') |
71 parser.add_argument('--num_motifs', dest='num_motifs', type=int, default=None, help='Number of motifs MEME should find for each condition') | 71 parser.add_option('--num_motifs', dest='num_motifs', type="int", default=None, help='Number of motifs MEME should find for each condition') |
72 parser.add_argument('--mememinw', dest='mememinw', type=int, default=None, help='Minimum motif width for MEME') | 72 parser.add_option('--mememinw', dest='mememinw', type="int", default=None, help='Minimum motif width for MEME') |
73 parser.add_argument('--mememaxw', dest='mememaxw', type=int, default=None, help='Maximum motif width for MEME') | 73 parser.add_option('--mememaxw', dest='mememaxw', type="int", default=None, help='Maximum motif width for MEME') |
74 parser.add_argument('--fixedpb', dest='fixedpb', type=int, default=None, help='Fixed per-base limit') | 74 parser.add_option('--fixedpb', dest='fixedpb', type="int", default=None, help='Fixed per-base limit') |
75 parser.add_argument('--poissongausspb', dest='poissongausspb', type=int, default=None, help='Poisson threshold for filtering per base') | 75 parser.add_option('--poissongausspb', dest='poissongausspb', type="int", default=None, help='Poisson threshold for filtering per base') |
76 parser.add_argument('--non_unique_reads', dest='non_unique_reads', default=None, help='Use non-unique reads') | 76 parser.add_option('--non_unique_reads', dest='non_unique_reads', default=None, help='Use non-unique reads') |
77 parser.add_argument('--minqvalue', dest='minqvalue', type=int, default=None, help='Minimum Q-value (corrected p-value) of reported binding events') | 77 parser.add_option('--minqvalue', dest='minqvalue', type="int", default=None, help='Minimum Q-value (corrected p-value) of reported binding events') |
78 parser.add_argument('--minfold', dest='minfold', type=int, default=None, help='Minimum event fold-change vs scaled control') | 78 parser.add_option('--minfold', dest='minfold', type="int", default=None, help='Minimum event fold-change vs scaled control') |
79 parser.add_argument('--diff_enrichment_tests', dest='diff_enrichment_tests', help='Run differential enrichment tests') | 79 parser.add_option('--diff_enrichment_tests', dest='diff_enrichment_tests', help='Run differential enrichment tests') |
80 parser.add_argument('--edgerod', dest='edgerod', type=int, default=None, help='EdgeR over-dispersion parameter value') | 80 parser.add_option('--edgerod', dest='edgerod', type="int", default=None, help='EdgeR over-dispersion parameter value') |
81 parser.add_argument('--diffp', dest='diffp', type=int, default=None, help='Minimum p-value for reporting differential enrichment') | 81 parser.add_option('--diffp', dest='diffp', type="int", default=None, help='Minimum p-value for reporting differential enrichment') |
82 parser.add_argument('--noscaling', dest='noscaling', default=None, help='Do not use signal vs control scaling') | 82 parser.add_option('--noscaling', dest='noscaling', default=None, help='Do not use signal vs control scaling') |
83 parser.add_argument('--medianscale', dest='medianscale', default=None, help='Use the median signal/control ratio as the scaling factor') | 83 parser.add_option('--medianscale', dest='medianscale', default=None, help='Use the median signal/control ratio as the scaling factor') |
84 parser.add_argument('--sesscale', dest='sesscale', default=None, help='Estimate scaling factor by SES') | 84 parser.add_option('--sesscale', dest='sesscale', default=None, help='Estimate scaling factor by SES') |
85 parser.add_argument('--scalewin', dest='scalewin', type=int, default=None, help='Window size for estimating scaling ratios') | 85 parser.add_option('--scalewin', dest='scalewin', type="int", default=None, help='Window size for estimating scaling ratios') |
86 parser.add_argument('--max_training_rounds', dest='max_training_rounds', type=int, default=None, help='Maximum number of training rounds for updating binding event read distributions') | 86 parser.add_option('--max_training_rounds', dest='max_training_rounds', type="int", default=None, help='Maximum number of training rounds for updating binding event read distributions') |
87 parser.add_argument('--exclude_file', dest='exclude_file', default=None, help='File containing a set of regions to ignore during MultiGPS training') | 87 parser.add_option('--exclude_file', dest='exclude_file', default=None, help='File containing a set of regions to ignore during MultiGPS training') |
88 parser.add_argument('--binding_model_updates', dest='binding_model_updates', default=None, help='Perform binding model updates') | 88 parser.add_option('--binding_model_updates', dest='binding_model_updates', default=None, help='Perform binding model updates') |
89 parser.add_argument('--minmodelupdateevents', dest='minmodelupdateevents', type=int, default=None, help='Minimum number of events to support an update of the read distribution') | 89 parser.add_option('--minmodelupdateevents', dest='minmodelupdateevents', type="int", default=None, help='Minimum number of events to support an update of the read distribution') |
90 parser.add_argument('--binding_model_smoothing', dest='binding_model_smoothing', default=None, help='Binding model smoothing') | 90 parser.add_option('--binding_model_smoothing', dest='binding_model_smoothing', default=None, help='Binding model smoothing') |
91 parser.add_argument('--spline_smooth', dest='spline_smooth', type=int, default=None, help='Binding model smoothing value') | 91 parser.add_option('--spline_smooth', dest='spline_smooth', type="int", default=None, help='Binding model smoothing value') |
92 parser.add_argument('--gauss_smooth', dest='gauss_smooth', type=int, default=None, help='Gaussian smoothing std dev') | 92 parser.add_option('--gauss_smooth', dest='gauss_smooth', type="int", default=None, help='Gaussian smoothing std dev') |
93 parser.add_argument('--joint_in_model', dest='joint_in_model', default=None, help='Allow joint events in model updates') | 93 parser.add_option('--joint_in_model', dest='joint_in_model', default=None, help='Allow joint events in model updates') |
94 parser.add_argument('--ml_config_not_shared', dest='ml_config_not_shared', default=None, help='Share component configs in the ML step') | 94 parser.add_option('--ml_config_not_shared', dest='ml_config_not_shared', default=None, help='Share component configs in the ML step') |
95 parser.add_argument('--output_html_path', dest='output_html_path', help='Output html results file') | 95 parser.add_option('--output_html_path', dest='output_html_path', help='Output html results file') |
96 parser.add_argument('--output_html_files_path', dest='output_html_files_path', help='Output html extra files') | 96 parser.add_option('--output_html_files_path', dest='output_html_files_path', help='Output html extra files') |
97 parser.add_argument('--output_process_path', dest='output_process_path', default=None, help='Output file for capturing stdout') | 97 parser.add_option('--output_process_path', dest='output_process_path', default=None, help='Output file for capturing stdout') |
98 args = parser.parse_args() | 98 options, args = parser.parse_args() |
99 | 99 |
100 try: | 100 try: |
101 # Preparation. | 101 # Preparation. |
102 tmp_dir = tempfile.mkdtemp(prefix="tmp-multigps-") | 102 tmp_dir = tempfile.mkdtemp(prefix="tmp-multigps-") |
103 generate_design_file(args.input_items, DESIGN_FILE) | 103 generate_design_file(options.input_items, DESIGN_FILE) |
104 # Build the command line. | 104 # Build the command line. |
105 cmd_list = ['java -jar %s --verbose' % args.multigps_jar] | 105 cmd_list = ['java -jar %s --verbose' % options.multigps_jar] |
106 cmd_list.append('--threads %d' % args.threads) | 106 cmd_list.append('--threads %d' % options.threads) |
107 cmd_list.append('--geninfo %s' % args.geninfo) | 107 cmd_list.append('--geninfo %s' % options.geninfo) |
108 if args.use_motif == 'yes': | 108 if options.use_motif == 'yes': |
109 cmd_list.append('--seq %s' % args.seq_dir) | 109 cmd_list.append('--seq %s' % options.seq_dir) |
110 if args.positional_prior == 'no': | 110 if options.positional_prior == 'no': |
111 cmd_list.append('--noposprior') | 111 cmd_list.append('--noposprior') |
112 cmd_list.append('--probshared %3f' % args.events_shared_probability) | 112 cmd_list.append('--probshared %3f' % options.events_shared_probability) |
113 if args.motifs == 'no': | 113 if options.motifs == 'no': |
114 cmd_list.append('--nomotifs') | 114 cmd_list.append('--nomotifs') |
115 if args.motif_finding_only == 'yes': | 115 if options.motif_finding_only == 'yes': |
116 cmd_list.append('--nomotifprior') | 116 cmd_list.append('--nomotifprior') |
117 if args.num_motifs is not None: | 117 if options.num_motifs is not None: |
118 cmd_list.append('--memenmotifs %d' % args.num_motifs) | 118 cmd_list.append('--memenmotifs %d' % options.num_motifs) |
119 if args.mememinw is not None: | 119 if options.mememinw is not None: |
120 cmd_list.append('--mememinw %d' % args.mememinw) | 120 cmd_list.append('--mememinw %d' % options.mememinw) |
121 if args.mememaxw is not None: | 121 if options.mememaxw is not None: |
122 cmd_list.append('--mememaxw %d' % args.mememaxw) | 122 cmd_list.append('--mememaxw %d' % options.mememaxw) |
123 if args.fixedpb is not None: | 123 if options.fixedpb is not None: |
124 cmd_list.append('--fixedpb %d' % args.fixedpb) | 124 cmd_list.append('--fixedpb %d' % options.fixedpb) |
125 if args.poissongausspb is not None: | 125 if options.poissongausspb is not None: |
126 cmd_list.append('--poissongausspb %d' % args.poissongausspb) | 126 cmd_list.append('--poissongausspb %d' % options.poissongausspb) |
127 if args.non_unique_reads == 'yes': | 127 if options.non_unique_reads == 'yes': |
128 cmd_list.append('--nonunique') | 128 cmd_list.append('--nonunique') |
129 if args.minqvalue is not None: | 129 if options.minqvalue is not None: |
130 cmd_list.append('--q %d' % args.minqvalue) | 130 cmd_list.append('--q %d' % options.minqvalue) |
131 if args.minfold is not None: | 131 if options.minfold is not None: |
132 cmd_list.append('--minfold %d' % args.minfold) | 132 cmd_list.append('--minfold %d' % options.minfold) |
133 if args.diff_enrichment_tests == 'no': | 133 if options.diff_enrichment_tests == 'no': |
134 cmd_list.append('--nodifftests') | 134 cmd_list.append('--nodifftests') |
135 if args.edgerod is not None: | 135 if options.edgerod is not None: |
136 cmd_list.append('--edgerod %d' % args.edgerod) | 136 cmd_list.append('--edgerod %d' % options.edgerod) |
137 if args.diffp is not None: | 137 if options.diffp is not None: |
138 cmd_list.append('--diffp %d' % args.diffp) | 138 cmd_list.append('--diffp %d' % options.diffp) |
139 if args.noscaling == 'no': | 139 if options.noscaling == 'no': |
140 cmd_list.append('--noscaling') | 140 cmd_list.append('--noscaling') |
141 if args.medianscale == "yes": | 141 if options.medianscale == "yes": |
142 cmd_list.append('--medianscale') | 142 cmd_list.append('--medianscale') |
143 if args.sesscale == "yes": | 143 if options.sesscale == "yes": |
144 cmd_list.append('--sesscale') | 144 cmd_list.append('--sesscale') |
145 if args.scalewin is not None: | 145 if options.scalewin is not None: |
146 cmd_list.append('--scalewin %d' % args.scalewin) | 146 cmd_list.append('--scalewin %d' % options.scalewin) |
147 if args.max_training_rounds is not None: | 147 if options.max_training_rounds is not None: |
148 cmd_list.append('--r %d' % args.max_training_rounds) | 148 cmd_list.append('--r %d' % options.max_training_rounds) |
149 if args.exclude_file not in [None, 'None']: | 149 if options.exclude_file not in [None, 'None']: |
150 cmd_list.append('--exclude %s' % args.exclude_file) | 150 cmd_list.append('--exclude %s' % options.exclude_file) |
151 if args.binding_model_updates == 'no': | 151 if options.binding_model_updates == 'no': |
152 cmd_list.append('--nomodelupdate') | 152 cmd_list.append('--nomodelupdate') |
153 if args.minmodelupdateevents is not None: | 153 if options.minmodelupdateevents is not None: |
154 cmd_list.append('--minmodelupdateevents %d' % args.minmodelupdateevents) | 154 cmd_list.append('--minmodelupdateevents %d' % options.minmodelupdateevents) |
155 if args.binding_model_smoothing == 'yes': | 155 if options.binding_model_smoothing == 'yes': |
156 if args.spline_smooth is not None: | 156 if options.spline_smooth is not None: |
157 cmd_list.append('--splinesmoothparam %d' % args.spline_smooth) | 157 cmd_list.append('--splinesmoothparam %d' % options.spline_smooth) |
158 else: | 158 else: |
159 cmd_list.append('--nomodelsmoothing') | 159 cmd_list.append('--nomodelsmoothing') |
160 if args.gauss_smooth is not None: | 160 if options.gauss_smooth is not None: |
161 cmd_list.append('--gaussmodelsmoothing') | 161 cmd_list.append('--gaussmodelsmoothing') |
162 cmd_list.append('--gausssmoothparam %d' % args.gauss_smooth) | 162 cmd_list.append('--gausssmoothparam %d' % options.gauss_smooth) |
163 if args.joint_in_model == 'yes': | 163 if options.joint_in_model == 'yes': |
164 cmd_list.append('--jointinmodel') | 164 cmd_list.append('--jointinmodel') |
165 if args.ml_config_not_shared == 'no': | 165 if options.ml_config_not_shared == 'no': |
166 cmd_list.append('--mlconfignotshared') | 166 cmd_list.append('--mlconfignotshared') |
167 cmd_list.append('--design %s' % DESIGN_FILE) | 167 cmd_list.append('--design %s' % DESIGN_FILE) |
168 cmd_list.append('--out %s' % args.output_html_files_path) | 168 cmd_list.append('--out %s' % options.output_html_files_path) |
169 if args.output_process_path is None: | 169 if options.output_process_path is None: |
170 cmd_list.append('2>/dev/null') | 170 cmd_list.append('2>/dev/null') |
171 else: | 171 else: |
172 cmd_list.append('2>%s' % args.output_process_path) | 172 cmd_list.append('2>%s' % options.output_process_path) |
173 cmd = ' '.join(cmd_list) | 173 cmd = ' '.join(cmd_list) |
174 # Define command response buffers. | 174 # Define command response buffers. |
175 tmp_out = tempfile.NamedTemporaryFile(dir=tmp_dir).name | 175 tmp_out = tempfile.NamedTemporaryFile(dir=tmp_dir).name |
176 tmp_stdout = open(tmp_out, 'wb') | 176 tmp_stdout = open(tmp_out, 'wb') |
177 tmp_err = tempfile.NamedTemporaryFile(dir=tmp_dir).name | 177 tmp_err = tempfile.NamedTemporaryFile(dir=tmp_dir).name |
182 rc = proc.wait() | 182 rc = proc.wait() |
183 if rc != 0: | 183 if rc != 0: |
184 error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout) | 184 error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout) |
185 stop_err(error_message) | 185 stop_err(error_message) |
186 # Move the output HTML file to the output dataset path. | 186 # Move the output HTML file to the output dataset path. |
187 output_html_filename = 'multiGPS_%s_results.html' % os.path.split(args.output_html_files_path)[1] | 187 output_html_filename = 'multiGPS_%s_results.html' % os.path.split(options.output_html_files_path)[1] |
188 shutil.move(os.path.join(args.output_html_files_path, output_html_filename), args.output_html_path) | 188 shutil.move(os.path.join(options.output_html_files_path, output_html_filename), options.output_html_path) |
189 except Exception, e: | 189 except Exception, e: |
190 stop_err('Error running MultiGPS\n%s\n' % str(e)) | 190 stop_err('Error running MultiGPS\n%s\n' % str(e)) |
191 finally: | 191 finally: |
192 if os.path.exists(tmp_dir): | 192 if os.path.exists(tmp_dir): |
193 shutil.rmtree(tmp_dir) | 193 shutil.rmtree(tmp_dir) |