changeset 174:b0ca3591242e draft

Uploaded
author greg
date Thu, 25 Jan 2018 11:53:15 -0500
parents 843bfa2dff2c
children 2b6b5e2769f5
files ideas.R
diffstat 1 files changed, 60 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/ideas.R	Thu Jan 25 11:53:07 2018 -0500
+++ b/ideas.R	Thu Jan 25 11:53:15 2018 -0500
@@ -4,43 +4,40 @@
 suppressPackageStartupMessages(library("optparse"))
 
 option_list <- list(
-    make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"),
-    make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"),
-    make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"),
-    make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"),
-    make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"),
-    make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"),
-    make_option(c("--input"), action="store", dest="input", help="IdeasPre input dataset"),
-    make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"),
-    make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"),
-    make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"),
-    make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"),
-    make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"),
-    make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"),
-    make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"),
-    make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"),
-    make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"),
-    make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"),
-    make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"),
-    make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"),
-    make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"),
-    make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"),
-    make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"),
-    make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"),
-    make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"),
-    make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations")
+            make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"),
+            make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"),
+            make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"),
+            make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"),
+            make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"),
+            make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"),
+            make_option(c("--input"), action="store", dest="input", help="IdeasPre input dataset"),
+            make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"),
+            make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"),
+            make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"),
+            make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"),
+            make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"),
+            make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"),
+            make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"),
+            make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"),
+            make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"),
+            make_option(c("--output_dir"), action="store", dest="output_dir", help="Output directory, used only if job ends in error and process log needs saving"),
+            make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"),
+            make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"),
+            make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"),
+            make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"),
+            make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"),
+            make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"),
+            make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"),
+            make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"),
+            make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations")
 )
 
 parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
 args <- parse_args(parser, positional_arguments=TRUE)
 opt <- args$options
 
-add_output_redirect <- function(cmd, save_ideas_log, output_log, default_log_name) {
-    if (is.null(save_ideas_log)) {
-        new_cmd = c(cmd, "&>>", default_log_name);
-    }else {
-        new_cmd = c(cmd, "&>>", output_log);
-    }
+add_output_redirect <- function(cmd, output_log) {
+    new_cmd = c(cmd, "&>>", output_log);
     return(paste(new_cmd, collapse=" "));
 }
 
@@ -351,22 +348,33 @@
     }
 }
 
-run_cmd <- function(cmd, save_ideas_log, output_log, default_log_name) {
+run_cmd <- function(cmd, save_ideas_log, output_log, output_dir) {
+    cat("save_ideas_log: ", save_ideas_log, "\n");
+    cat("output_log: ", output_log, "\n");
+    cat("output_dir: ", output_dir, "\n");
+    cat("\nRunning cmd:\n", cmd, "\n\n");
     rc = system(cmd);
     if (rc != 0) {
         if (is.null(save_ideas_log)) {
-            file.rename(default_log_name, output_log);
+            to_path = paste(output_dir, output_log, sep="/");
+            file.rename(output_log, to_path);
         }
         quit(save="no", status=rc);
     }
 }
 
 # Initialize values.
-default_log_name = "ideas_log.txt";
+if (is.null(opt$save_ideas_log)) {
+    output_log = "ideas_log.txt";
+} else {
+    output_log = opt$output_log;
+}
 # Get full path of chromosomes.bed if not NULL.
 chrom_bed_input = get_file_path(opt$input_files_path, opt$chrom_bed_input);
+cat("chrom_bed_input: ", chrom_bed_input, "\n");
 # Get full path of chromosome_windows.txt if not NULL.
 chromosome_windows = get_file_path(opt$input_files_path, opt$chromosome_windows);
+cat("chromosome_windows: ", chromosome_windows, "\n");
 if (is.null(chromosome_windows)) {
     windows_by_chrom = NULL;
 } else {
@@ -374,11 +382,14 @@
     windows_by_chrom = get_windows_by_chrom(chromosome_windows);
 }
 ideas_input_config = get_file_path(opt$input_files_path, opt$ideas_input_config);
+cat("ideas_input_config: ", ideas_input_config, "\n");
 base_cmd = get_base_cmd(ideas_input_config, chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp,
-    opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes,
-    opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr,
-    opt$maxerr, opt$rseed, opt$thread);
+            opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes,
+            opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr,
+            opt$maxerr, opt$rseed, opt$thread);
+cat("base_cmd: ", base_cmd, "\n");
 output_base_name = opt$project_name;
+cat("output_base_name: ", output_base_name, "\n");
 
 if (is.null(opt$training_iterations)) {
     # Not performing training.
@@ -386,8 +397,8 @@
         # Not performing windows by chromosome.
         output_name = output_base_name;
         cmd = paste(base_cmd, "-o", output_name, sep=" ");
-        cmd = add_output_redirect(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
-        run_cmd(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
+        cmd = add_output_redirect(cmd, output_log);
+        run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
     } else {
         # Performing windows by chromosome.
         for (i in 1:length(windows_by_chrom)) {
@@ -399,8 +410,8 @@
             output_name = paste(output_base_name, chrom, sep=".");
             cmd = paste(base_cmd, "-inv", window_start, window_end, sep=" ");
             cmd = paste(cmd, "-o", output_name, sep=" ");
-            cmd = add_output_redirect(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
-            run_cmd(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
+            cmd = add_output_redirect(cmd, output_log);
+            run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
         }
     }
 } else {
@@ -409,8 +420,8 @@
     output_profile0 = paste(output_base_name, "profile0", sep=".");
     for (i in 1:opt$training_iterations) {
         cmd = paste(base_cmd, "-o", paste(output_base_name, ".tmp.", i, sep=""), sep=" ");
-        cmd = add_output_redirect(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
-        run_cmd(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
+        cmd = add_output_redirect(cmd, output_log);
+        run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
     }
     tpara = combine_state(paste(output_base_name, "tmp", (1:opt$training_iterations), "para", sep="."), mycut=0.5);
     write.table(tpara$profile, output_profile0, quote=F, row.names=F, col.names=F);
@@ -423,15 +434,15 @@
     base_cmd = paste(base_cmd, "-otherpara", output_para0[[1]], output_profile0[[1]], sep=" ");
     if (is.null(windows_by_chrom)) {
         cmd = c(base_cmd, "-o", output_base_name);
-        cmd = add_output_redirect(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
-        run_cmd(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
+        cmd = add_output_redirect(cmd, output_log);
+        run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
     } else {
         # Performing windows by chromosome.
         if (length(windows_by_chrom) == 1) {
             output_name = paste(output_base_name, i, sep=".");
             cmd = c(base_cmd, "-o", output_name);
-            cmd = add_output_redirect(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
-            run_cmd(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
+            cmd = add_output_redirect(cmd, output_log);
+            run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
         } else {
             for (i in 1:length(windows_by_chrom)) {
                 line = windows_by_chrom[i];
@@ -442,8 +453,8 @@
                 cmd = paste(base_cmd, "-inv", window_start, window_end, sep=" ");
                 output_name = paste(output_base_name, chrom, sep=".");
                 cmd = paste(cmd, "-o", output_name, sep=" ");
-                cmd = add_output_redirect(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
-                run_cmd(cmd, opt$save_ideas_log, opt$output_log, default_log_name);
+                cmd = add_output_redirect(cmd, output_log);
+                run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir);
             }
         }
     }