Mercurial > repos > greg > multigps
comparison multigps.py @ 5:7b15099378a7 draft
Uploaded
| author | greg |
|---|---|
| date | Sat, 02 Jan 2016 14:07:37 -0500 |
| parents | |
| children | afba8c2a3b32 |
comparison
equal
deleted
inserted
replaced
| 4:32cff4d1d0ff | 5:7b15099378a7 |
|---|---|
| 1 import argparse | |
| 2 import os | |
| 3 import shutil | |
| 4 import subprocess | |
| 5 import sys | |
| 6 import tempfile | |
| 7 | |
| 8 BUFF_SIZE = 1048576 | |
| 9 DESIGN_FILE = 'design.tabular' | |
| 10 | |
| 11 | |
| 12 def generate_design_file(input_items, filename): | |
| 13 design_file = open(filename, 'wb') | |
| 14 for items in input_items: | |
| 15 filename, ext, signal_control, condition_name, replicate_name, read_distribution_file, fixed_read = items | |
| 16 # MultiGPS version 0.5 does not support the newer scidx datatype. | |
| 17 if ext == 'scidx': | |
| 18 datatype = 'IDX' | |
| 19 else: | |
| 20 datatype = ext.upper() | |
| 21 design_file.write('%s\n' % '\t'.join([filename, signal_control, datatype, condition_name, replicate_name, read_distribution_file, fixed_read])) | |
| 22 design_file.close() | |
| 23 | |
| 24 | |
| 25 def get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout, include_stdout=False): | |
| 26 tmp_stderr.close() | |
| 27 # Get stderr, allowing for case where it's very large. | |
| 28 tmp_stderr = open(tmp_err, 'rb') | |
| 29 stderr_str = '' | |
| 30 buffsize = BUFF_SIZE | |
| 31 try: | |
| 32 while True: | |
| 33 stderr_str += tmp_stderr.read(buffsize) | |
| 34 if not stderr_str or len(stderr_str) % buffsize != 0: | |
| 35 break | |
| 36 except OverflowError: | |
| 37 pass | |
| 38 tmp_stderr.close() | |
| 39 if include_stdout: | |
| 40 tmp_stdout = open(tmp_out, 'rb') | |
| 41 stdout_str = '' | |
| 42 buffsize = BUFF_SIZE | |
| 43 try: | |
| 44 while True: | |
| 45 stdout_str += tmp_stdout.read(buffsize) | |
| 46 if not stdout_str or len(stdout_str) % buffsize != 0: | |
| 47 break | |
| 48 except OverflowError: | |
| 49 pass | |
| 50 tmp_stdout.close() | |
| 51 if include_stdout: | |
| 52 return 'STDOUT\n%s\n\nSTDERR\n%s\n' % (stdout_str, stderr_str) | |
| 53 return stderr_str | |
| 54 | |
| 55 | |
| 56 def stop_err(msg): | |
| 57 sys.stderr.write(msg) | |
| 58 sys.exit(1) | |
| 59 | |
| 60 parser = argparse.ArgumentParser() | |
| 61 parser.add_argument('--input_item', dest='input_items', action='append', nargs=7, help="Input datasets and associated parameters") | |
| 62 parser.add_argument('--threads', dest='threads', type=int, default=4, help='The number of threads to run') | |
| 63 parser.add_argument('--multigps_jar', dest='multigps_jar', help='Path to the MultiGPS jar file') | |
| 64 parser.add_argument('--geninfo', dest='geninfo', help='File listing the lengths of all chromosomes') | |
| 65 parser.add_argument('--use_motif', dest='use_motif', help='Perform motif-finding or use a motif-prior') | |
| 66 parser.add_argument('--seq_dir', dest='seq_dir', default=None, help='Directory containing fasta files corresponding to every named chromosome') | |
| 67 parser.add_argument('--positional_prior', dest='positional_prior', default=None, help='Perform inter-experiment positional prior') | |
| 68 parser.add_argument('--events_shared_probability', dest='events_shared_probability', type=float, default=0.9, help='Probability that events are shared across conditions') | |
| 69 parser.add_argument('--motifs', dest='motifs', default=None, help='Perform motif-finding and motif priors') | |
| 70 parser.add_argument('--motif_finding_only', dest='motif_finding_only', default=None, help='Perform motif-finding only') | |
| 71 parser.add_argument('--num_motifs', dest='num_motifs', type=int, default=None, help='Number of motifs MEME should find for each condition') | |
| 72 parser.add_argument('--mememinw', dest='mememinw', type=int, default=None, help='Minimum motif width for MEME') | |
| 73 parser.add_argument('--mememaxw', dest='mememaxw', type=int, default=None, help='Maximum motif width for MEME') | |
| 74 parser.add_argument('--fixedpb', dest='fixedpb', type=int, default=None, help='Fixed per-base limit') | |
| 75 parser.add_argument('--poissongausspb', dest='poissongausspb', type=int, default=None, help='Poisson threshold for filtering per base') | |
| 76 parser.add_argument('--non_unique_reads', dest='non_unique_reads', default=None, help='Use non-unique reads') | |
| 77 parser.add_argument('--minqvalue', dest='minqvalue', type=int, default=None, help='Minimum Q-value (corrected p-value) of reported binding events') | |
| 78 parser.add_argument('--minfold', dest='minfold', type=int, default=None, help='Minimum event fold-change vs scaled control') | |
| 79 parser.add_argument('--diff_enrichment_tests', dest='diff_enrichment_tests', help='Run differential enrichment tests') | |
| 80 parser.add_argument('--edgerod', dest='edgerod', type=int, default=None, help='EdgeR over-dispersion parameter value') | |
| 81 parser.add_argument('--diffp', dest='diffp', type=int, default=None, help='Minimum p-value for reporting differential enrichment') | |
| 82 parser.add_argument('--noscaling', dest='noscaling', default=None, help='Do not use signal vs control scaling') | |
| 83 parser.add_argument('--medianscale', dest='medianscale', default=None, help='Use the median signal/control ratio as the scaling factor') | |
| 84 parser.add_argument('--sesscale', dest='sesscale', default=None, help='Estimate scaling factor by SES') | |
| 85 parser.add_argument('--scalewin', dest='scalewin', type=int, default=None, help='Window size for estimating scaling ratios') | |
| 86 parser.add_argument('--max_training_rounds', dest='max_training_rounds', type=int, default=None, help='Maximum number of training rounds for updating binding event read distributions') | |
| 87 parser.add_argument('--exclude_file', dest='exclude_file', default=None, help='File containing a set of regions to ignore during MultiGPS training') | |
| 88 parser.add_argument('--binding_model_updates', dest='binding_model_updates', default=None, help='Perform binding model updates') | |
| 89 parser.add_argument('--minmodelupdateevents', dest='minmodelupdateevents', type=int, default=None, help='Minimum number of events to support an update of the read distribution') | |
| 90 parser.add_argument('--binding_model_smoothing', dest='binding_model_smoothing', default=None, help='Binding model smoothing') | |
| 91 parser.add_argument('--spline_smooth', dest='spline_smooth', type=int, default=None, help='Binding model smoothing value') | |
| 92 parser.add_argument('--gauss_smooth', dest='gauss_smooth', type=int, default=None, help='Gaussian smoothing std dev') | |
| 93 parser.add_argument('--joint_in_model', dest='joint_in_model', default=None, help='Allow joint events in model updates') | |
| 94 parser.add_argument('--ml_config_not_shared', dest='ml_config_not_shared', default=None, help='Share component configs in the ML step') | |
| 95 parser.add_argument('--output_html_path', dest='output_html_path', help='Output html results file') | |
| 96 parser.add_argument('--output_html_files_path', dest='output_html_files_path', help='Output html extra files') | |
| 97 parser.add_argument('--output_process_path', dest='output_process_path', default=None, help='Output file for capturing stdout') | |
| 98 args = parser.parse_args() | |
| 99 | |
| 100 try: | |
| 101 # Preparation. | |
| 102 tmp_dir = tempfile.mkdtemp(prefix="tmp-multigps-") | |
| 103 generate_design_file(args.input_items, DESIGN_FILE) | |
| 104 # Build the command line. | |
| 105 cmd_list = ['java -jar %s --verbose' % args.multigps_jar] | |
| 106 cmd_list.append('--threads %d' % args.threads) | |
| 107 cmd_list.append('--geninfo %s' % args.geninfo) | |
| 108 if args.use_motif == 'yes': | |
| 109 cmd_list.append('--seq %s' % args.seq_dir) | |
| 110 if args.positional_prior == 'no': | |
| 111 cmd_list.append('--noposprior') | |
| 112 cmd_list.append('--probshared %3f' % args.events_shared_probability) | |
| 113 if args.motifs == 'no': | |
| 114 cmd_list.append('--nomotifs') | |
| 115 if args.motif_finding_only == 'yes': | |
| 116 cmd_list.append('--nomotifprior') | |
| 117 if args.num_motifs is not None: | |
| 118 cmd_list.append('--memenmotifs %d' % args.num_motifs) | |
| 119 if args.mememinw is not None: | |
| 120 cmd_list.append('--mememinw %d' % args.mememinw) | |
| 121 if args.mememaxw is not None: | |
| 122 cmd_list.append('--mememaxw %d' % args.mememaxw) | |
| 123 if args.fixedpb is not None: | |
| 124 cmd_list.append('--fixedpb %d' % args.fixedpb) | |
| 125 if args.poissongausspb is not None: | |
| 126 cmd_list.append('--poissongausspb %d' % args.poissongausspb) | |
| 127 if args.non_unique_reads == 'yes': | |
| 128 cmd_list.append('--nonunique') | |
| 129 if args.minqvalue is not None: | |
| 130 cmd_list.append('--q %d' % args.minqvalue) | |
| 131 if args.minfold is not None: | |
| 132 cmd_list.append('--minfold %d' % args.minfold) | |
| 133 if args.diff_enrichment_tests == 'no': | |
| 134 cmd_list.append('--nodifftests') | |
| 135 if args.edgerod is not None: | |
| 136 cmd_list.append('--edgerod %d' % args.edgerod) | |
| 137 if args.diffp is not None: | |
| 138 cmd_list.append('--diffp %d' % args.diffp) | |
| 139 if args.noscaling == 'no': | |
| 140 cmd_list.append('--noscaling') | |
| 141 if args.medianscale == "yes": | |
| 142 cmd_list.append('--medianscale') | |
| 143 if args.sesscale == "yes": | |
| 144 cmd_list.append('--sesscale') | |
| 145 if args.scalewin is not None: | |
| 146 cmd_list.append('--scalewin %d' % args.scalewin) | |
| 147 if args.max_training_rounds is not None: | |
| 148 cmd_list.append('--r %d' % args.max_training_rounds) | |
| 149 if args.exclude_file not in [None, 'None']: | |
| 150 cmd_list.append('--exclude %s' % args.exclude_file) | |
| 151 if args.binding_model_updates == 'no': | |
| 152 cmd_list.append('--nomodelupdate') | |
| 153 if args.minmodelupdateevents is not None: | |
| 154 cmd_list.append('--minmodelupdateevents %d' % args.minmodelupdateevents) | |
| 155 if args.binding_model_smoothing == 'yes': | |
| 156 if args.spline_smooth is not None: | |
| 157 cmd_list.append('--splinesmoothparam %d' % args.spline_smooth) | |
| 158 else: | |
| 159 cmd_list.append('--nomodelsmoothing') | |
| 160 if args.gauss_smooth is not None: | |
| 161 cmd_list.append('--gaussmodelsmoothing') | |
| 162 cmd_list.append('--gausssmoothparam %d' % args.gauss_smooth) | |
| 163 if args.joint_in_model == 'yes': | |
| 164 cmd_list.append('--jointinmodel') | |
| 165 if args.ml_config_not_shared == 'no': | |
| 166 cmd_list.append('--mlconfignotshared') | |
| 167 cmd_list.append('--design %s' % DESIGN_FILE) | |
| 168 cmd_list.append('--out %s' % args.output_html_files_path) | |
| 169 if args.output_process_path is None: | |
| 170 cmd_list.append('2>/dev/null') | |
| 171 else: | |
| 172 cmd_list.append('2>%s' % args.output_process_path) | |
| 173 cmd = ' '.join(cmd_list) | |
| 174 # Define command response buffers. | |
| 175 tmp_out = tempfile.NamedTemporaryFile(dir=tmp_dir).name | |
| 176 tmp_stdout = open(tmp_out, 'wb') | |
| 177 tmp_err = tempfile.NamedTemporaryFile(dir=tmp_dir).name | |
| 178 tmp_stderr = open(tmp_err, 'wb') | |
| 179 tmp_filename = tempfile.NamedTemporaryFile(dir=tmp_dir).name | |
| 180 # Execute the command. | |
| 181 proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, stdout=tmp_stdout, shell=True) | |
| 182 rc = proc.wait() | |
| 183 if rc != 0: | |
| 184 error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout) | |
| 185 stop_err(error_message) | |
| 186 # Move the output HTML file to the output dataset path. | |
| 187 output_html_filename = 'multiGPS_%s_results.html' % os.path.split(args.output_html_files_path)[1] | |
| 188 shutil.move(os.path.join(args.output_html_files_path, output_html_filename), args.output_html_path) | |
| 189 except Exception, e: | |
| 190 stop_err('Error running MultiGPS\n%s\n' % str(e)) | |
| 191 finally: | |
| 192 if os.path.exists(tmp_dir): | |
| 193 shutil.rmtree(tmp_dir) |
