Mercurial > repos > greg > multigps
changeset 7:afba8c2a3b32 draft
Uploaded
author | greg |
---|---|
date | Sat, 02 Jan 2016 14:25:59 -0500 |
parents | 155190df92fb |
children | 949d2dedaedc |
files | multigps.py |
diffstat | 1 files changed, 94 insertions(+), 94 deletions(-) [+] |
line wrap: on
line diff
--- a/multigps.py Sat Jan 02 14:09:44 2016 -0500 +++ b/multigps.py Sat Jan 02 14:25:59 2016 -0500 @@ -1,4 +1,4 @@ -import argparse +import optparse import os import shutil import subprocess @@ -57,119 +57,119 @@ sys.stderr.write(msg) sys.exit(1) -parser = argparse.ArgumentParser() -parser.add_argument('--input_item', dest='input_items', action='append', nargs=7, help="Input datasets and associated parameters") -parser.add_argument('--threads', dest='threads', type=int, default=4, help='The number of threads to run') -parser.add_argument('--multigps_jar', dest='multigps_jar', help='Path to the MultiGPS jar file') -parser.add_argument('--geninfo', dest='geninfo', help='File listing the lengths of all chromosomes') -parser.add_argument('--use_motif', dest='use_motif', help='Perform motif-finding or use a motif-prior') -parser.add_argument('--seq_dir', dest='seq_dir', default=None, help='Directory containing fasta files corresponding to every named chromosome') -parser.add_argument('--positional_prior', dest='positional_prior', default=None, help='Perform inter-experiment positional prior') -parser.add_argument('--events_shared_probability', dest='events_shared_probability', type=float, default=0.9, help='Probability that events are shared across conditions') -parser.add_argument('--motifs', dest='motifs', default=None, help='Perform motif-finding and motif priors') -parser.add_argument('--motif_finding_only', dest='motif_finding_only', default=None, help='Perform motif-finding only') -parser.add_argument('--num_motifs', dest='num_motifs', type=int, default=None, help='Number of motifs MEME should find for each condition') -parser.add_argument('--mememinw', dest='mememinw', type=int, default=None, help='Minimum motif width for MEME') -parser.add_argument('--mememaxw', dest='mememaxw', type=int, default=None, help='Maximum motif width for MEME') -parser.add_argument('--fixedpb', dest='fixedpb', type=int, default=None, help='Fixed per-base limit') -parser.add_argument('--poissongausspb', dest='poissongausspb', type=int, default=None, help='Poisson threshold for filtering per base') -parser.add_argument('--non_unique_reads', dest='non_unique_reads', default=None, help='Use non-unique reads') -parser.add_argument('--minqvalue', dest='minqvalue', type=int, default=None, help='Minimum Q-value (corrected p-value) of reported binding events') -parser.add_argument('--minfold', dest='minfold', type=int, default=None, help='Minimum event fold-change vs scaled control') -parser.add_argument('--diff_enrichment_tests', dest='diff_enrichment_tests', help='Run differential enrichment tests') -parser.add_argument('--edgerod', dest='edgerod', type=int, default=None, help='EdgeR over-dispersion parameter value') -parser.add_argument('--diffp', dest='diffp', type=int, default=None, help='Minimum p-value for reporting differential enrichment') -parser.add_argument('--noscaling', dest='noscaling', default=None, help='Do not use signal vs control scaling') -parser.add_argument('--medianscale', dest='medianscale', default=None, help='Use the median signal/control ratio as the scaling factor') -parser.add_argument('--sesscale', dest='sesscale', default=None, help='Estimate scaling factor by SES') -parser.add_argument('--scalewin', dest='scalewin', type=int, default=None, help='Window size for estimating scaling ratios') -parser.add_argument('--max_training_rounds', dest='max_training_rounds', type=int, default=None, help='Maximum number of training rounds for updating binding event read distributions') -parser.add_argument('--exclude_file', dest='exclude_file', default=None, help='File containing a set of regions to ignore during MultiGPS training') -parser.add_argument('--binding_model_updates', dest='binding_model_updates', default=None, help='Perform binding model updates') -parser.add_argument('--minmodelupdateevents', dest='minmodelupdateevents', type=int, default=None, help='Minimum number of events to support an update of the read distribution') -parser.add_argument('--binding_model_smoothing', dest='binding_model_smoothing', default=None, help='Binding model smoothing') -parser.add_argument('--spline_smooth', dest='spline_smooth', type=int, default=None, help='Binding model smoothing value') -parser.add_argument('--gauss_smooth', dest='gauss_smooth', type=int, default=None, help='Gaussian smoothing std dev') -parser.add_argument('--joint_in_model', dest='joint_in_model', default=None, help='Allow joint events in model updates') -parser.add_argument('--ml_config_not_shared', dest='ml_config_not_shared', default=None, help='Share component configs in the ML step') -parser.add_argument('--output_html_path', dest='output_html_path', help='Output html results file') -parser.add_argument('--output_html_files_path', dest='output_html_files_path', help='Output html extra files') -parser.add_argument('--output_process_path', dest='output_process_path', default=None, help='Output file for capturing stdout') -args = parser.parse_args() +parser = optparse.OptionParser() +parser.add_option('--input_item', dest='input_items', action='append', nargs=7, help="Input datasets and associated parameters") +parser.add_option('--threads', dest='threads', type="int", default=4, help='The number of threads to run') +parser.add_option('--multigps_jar', dest='multigps_jar', help='Path to the MultiGPS jar file') +parser.add_option('--geninfo', dest='geninfo', help='File listing the lengths of all chromosomes') +parser.add_option('--use_motif', dest='use_motif', help='Perform motif-finding or use a motif-prior') +parser.add_option('--seq_dir', dest='seq_dir', default=None, help='Directory containing fasta files corresponding to every named chromosome') +parser.add_option('--positional_prior', dest='positional_prior', default=None, help='Perform inter-experiment positional prior') +parser.add_option('--events_shared_probability', dest='events_shared_probability', type=float, default=0.9, help='Probability that events are shared across conditions') +parser.add_option('--motifs', dest='motifs', default=None, help='Perform motif-finding and motif priors') +parser.add_option('--motif_finding_only', dest='motif_finding_only', default=None, help='Perform motif-finding only') +parser.add_option('--num_motifs', dest='num_motifs', type="int", default=None, help='Number of motifs MEME should find for each condition') +parser.add_option('--mememinw', dest='mememinw', type="int", default=None, help='Minimum motif width for MEME') +parser.add_option('--mememaxw', dest='mememaxw', type="int", default=None, help='Maximum motif width for MEME') +parser.add_option('--fixedpb', dest='fixedpb', type="int", default=None, help='Fixed per-base limit') +parser.add_option('--poissongausspb', dest='poissongausspb', type="int", default=None, help='Poisson threshold for filtering per base') +parser.add_option('--non_unique_reads', dest='non_unique_reads', default=None, help='Use non-unique reads') +parser.add_option('--minqvalue', dest='minqvalue', type="int", default=None, help='Minimum Q-value (corrected p-value) of reported binding events') +parser.add_option('--minfold', dest='minfold', type="int", default=None, help='Minimum event fold-change vs scaled control') +parser.add_option('--diff_enrichment_tests', dest='diff_enrichment_tests', help='Run differential enrichment tests') +parser.add_option('--edgerod', dest='edgerod', type="int", default=None, help='EdgeR over-dispersion parameter value') +parser.add_option('--diffp', dest='diffp', type="int", default=None, help='Minimum p-value for reporting differential enrichment') +parser.add_option('--noscaling', dest='noscaling', default=None, help='Do not use signal vs control scaling') +parser.add_option('--medianscale', dest='medianscale', default=None, help='Use the median signal/control ratio as the scaling factor') +parser.add_option('--sesscale', dest='sesscale', default=None, help='Estimate scaling factor by SES') +parser.add_option('--scalewin', dest='scalewin', type="int", default=None, help='Window size for estimating scaling ratios') +parser.add_option('--max_training_rounds', dest='max_training_rounds', type="int", default=None, help='Maximum number of training rounds for updating binding event read distributions') +parser.add_option('--exclude_file', dest='exclude_file', default=None, help='File containing a set of regions to ignore during MultiGPS training') +parser.add_option('--binding_model_updates', dest='binding_model_updates', default=None, help='Perform binding model updates') +parser.add_option('--minmodelupdateevents', dest='minmodelupdateevents', type="int", default=None, help='Minimum number of events to support an update of the read distribution') +parser.add_option('--binding_model_smoothing', dest='binding_model_smoothing', default=None, help='Binding model smoothing') +parser.add_option('--spline_smooth', dest='spline_smooth', type="int", default=None, help='Binding model smoothing value') +parser.add_option('--gauss_smooth', dest='gauss_smooth', type="int", default=None, help='Gaussian smoothing std dev') +parser.add_option('--joint_in_model', dest='joint_in_model', default=None, help='Allow joint events in model updates') +parser.add_option('--ml_config_not_shared', dest='ml_config_not_shared', default=None, help='Share component configs in the ML step') +parser.add_option('--output_html_path', dest='output_html_path', help='Output html results file') +parser.add_option('--output_html_files_path', dest='output_html_files_path', help='Output html extra files') +parser.add_option('--output_process_path', dest='output_process_path', default=None, help='Output file for capturing stdout') +options, args = parser.parse_args() try: # Preparation. tmp_dir = tempfile.mkdtemp(prefix="tmp-multigps-") - generate_design_file(args.input_items, DESIGN_FILE) + generate_design_file(options.input_items, DESIGN_FILE) # Build the command line. - cmd_list = ['java -jar %s --verbose' % args.multigps_jar] - cmd_list.append('--threads %d' % args.threads) - cmd_list.append('--geninfo %s' % args.geninfo) - if args.use_motif == 'yes': - cmd_list.append('--seq %s' % args.seq_dir) - if args.positional_prior == 'no': + cmd_list = ['java -jar %s --verbose' % options.multigps_jar] + cmd_list.append('--threads %d' % options.threads) + cmd_list.append('--geninfo %s' % options.geninfo) + if options.use_motif == 'yes': + cmd_list.append('--seq %s' % options.seq_dir) + if options.positional_prior == 'no': cmd_list.append('--noposprior') - cmd_list.append('--probshared %3f' % args.events_shared_probability) - if args.motifs == 'no': + cmd_list.append('--probshared %3f' % options.events_shared_probability) + if options.motifs == 'no': cmd_list.append('--nomotifs') - if args.motif_finding_only == 'yes': + if options.motif_finding_only == 'yes': cmd_list.append('--nomotifprior') - if args.num_motifs is not None: - cmd_list.append('--memenmotifs %d' % args.num_motifs) - if args.mememinw is not None: - cmd_list.append('--mememinw %d' % args.mememinw) - if args.mememaxw is not None: - cmd_list.append('--mememaxw %d' % args.mememaxw) - if args.fixedpb is not None: - cmd_list.append('--fixedpb %d' % args.fixedpb) - if args.poissongausspb is not None: - cmd_list.append('--poissongausspb %d' % args.poissongausspb) - if args.non_unique_reads == 'yes': + if options.num_motifs is not None: + cmd_list.append('--memenmotifs %d' % options.num_motifs) + if options.mememinw is not None: + cmd_list.append('--mememinw %d' % options.mememinw) + if options.mememaxw is not None: + cmd_list.append('--mememaxw %d' % options.mememaxw) + if options.fixedpb is not None: + cmd_list.append('--fixedpb %d' % options.fixedpb) + if options.poissongausspb is not None: + cmd_list.append('--poissongausspb %d' % options.poissongausspb) + if options.non_unique_reads == 'yes': cmd_list.append('--nonunique') - if args.minqvalue is not None: - cmd_list.append('--q %d' % args.minqvalue) - if args.minfold is not None: - cmd_list.append('--minfold %d' % args.minfold) - if args.diff_enrichment_tests == 'no': + if options.minqvalue is not None: + cmd_list.append('--q %d' % options.minqvalue) + if options.minfold is not None: + cmd_list.append('--minfold %d' % options.minfold) + if options.diff_enrichment_tests == 'no': cmd_list.append('--nodifftests') - if args.edgerod is not None: - cmd_list.append('--edgerod %d' % args.edgerod) - if args.diffp is not None: - cmd_list.append('--diffp %d' % args.diffp) - if args.noscaling == 'no': + if options.edgerod is not None: + cmd_list.append('--edgerod %d' % options.edgerod) + if options.diffp is not None: + cmd_list.append('--diffp %d' % options.diffp) + if options.noscaling == 'no': cmd_list.append('--noscaling') - if args.medianscale == "yes": + if options.medianscale == "yes": cmd_list.append('--medianscale') - if args.sesscale == "yes": + if options.sesscale == "yes": cmd_list.append('--sesscale') - if args.scalewin is not None: - cmd_list.append('--scalewin %d' % args.scalewin) - if args.max_training_rounds is not None: - cmd_list.append('--r %d' % args.max_training_rounds) - if args.exclude_file not in [None, 'None']: - cmd_list.append('--exclude %s' % args.exclude_file) - if args.binding_model_updates == 'no': + if options.scalewin is not None: + cmd_list.append('--scalewin %d' % options.scalewin) + if options.max_training_rounds is not None: + cmd_list.append('--r %d' % options.max_training_rounds) + if options.exclude_file not in [None, 'None']: + cmd_list.append('--exclude %s' % options.exclude_file) + if options.binding_model_updates == 'no': cmd_list.append('--nomodelupdate') - if args.minmodelupdateevents is not None: - cmd_list.append('--minmodelupdateevents %d' % args.minmodelupdateevents) - if args.binding_model_smoothing == 'yes': - if args.spline_smooth is not None: - cmd_list.append('--splinesmoothparam %d' % args.spline_smooth) + if options.minmodelupdateevents is not None: + cmd_list.append('--minmodelupdateevents %d' % options.minmodelupdateevents) + if options.binding_model_smoothing == 'yes': + if options.spline_smooth is not None: + cmd_list.append('--splinesmoothparam %d' % options.spline_smooth) else: cmd_list.append('--nomodelsmoothing') - if args.gauss_smooth is not None: + if options.gauss_smooth is not None: cmd_list.append('--gaussmodelsmoothing') - cmd_list.append('--gausssmoothparam %d' % args.gauss_smooth) - if args.joint_in_model == 'yes': + cmd_list.append('--gausssmoothparam %d' % options.gauss_smooth) + if options.joint_in_model == 'yes': cmd_list.append('--jointinmodel') - if args.ml_config_not_shared == 'no': + if options.ml_config_not_shared == 'no': cmd_list.append('--mlconfignotshared') cmd_list.append('--design %s' % DESIGN_FILE) - cmd_list.append('--out %s' % args.output_html_files_path) - if args.output_process_path is None: + cmd_list.append('--out %s' % options.output_html_files_path) + if options.output_process_path is None: cmd_list.append('2>/dev/null') else: - cmd_list.append('2>%s' % args.output_process_path) + cmd_list.append('2>%s' % options.output_process_path) cmd = ' '.join(cmd_list) # Define command response buffers. tmp_out = tempfile.NamedTemporaryFile(dir=tmp_dir).name @@ -184,8 +184,8 @@ error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout) stop_err(error_message) # Move the output HTML file to the output dataset path. - output_html_filename = 'multiGPS_%s_results.html' % os.path.split(args.output_html_files_path)[1] - shutil.move(os.path.join(args.output_html_files_path, output_html_filename), args.output_html_path) + output_html_filename = 'multiGPS_%s_results.html' % os.path.split(options.output_html_files_path)[1] + shutil.move(os.path.join(options.output_html_files_path, output_html_filename), options.output_html_path) except Exception, e: stop_err('Error running MultiGPS\n%s\n' % str(e)) finally: