changeset 175:2b6b5e2769f5 draft

Uploaded
author greg
date Thu, 25 Jan 2018 13:38:06 -0500
parents b0ca3591242e
children e0d5669fee03
files ideas.R
diffstat 1 files changed, 29 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/ideas.R	Thu Jan 25 11:53:15 2018 -0500
+++ b/ideas.R	Thu Jan 25 13:38:06 2018 -0500
@@ -4,32 +4,31 @@
 suppressPackageStartupMessages(library("optparse"))
 
 option_list <- list(
-            make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"),
-            make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"),
-            make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"),
-            make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"),
-            make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"),
-            make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"),
-            make_option(c("--input"), action="store", dest="input", help="IdeasPre input dataset"),
-            make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"),
-            make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"),
-            make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"),
-            make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"),
-            make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"),
-            make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"),
-            make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"),
-            make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"),
-            make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"),
-            make_option(c("--output_dir"), action="store", dest="output_dir", help="Output directory, used only if job ends in error and process log needs saving"),
-            make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"),
-            make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"),
-            make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"),
-            make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"),
-            make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"),
-            make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"),
-            make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"),
-            make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"),
-            make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations")
+    make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"),
+    make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"),
+    make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"),
+    make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"),
+    make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"),
+    make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"),
+    make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"),
+    make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"),
+    make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"),
+    make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"),
+    make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"),
+    make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"),
+    make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"),
+    make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"),
+    make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"),
+    make_option(c("--output_dir"), action="store", dest="output_dir", help="Output directory, used only if job ends in error and process log needs saving"),
+    make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"),
+    make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"),
+    make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"),
+    make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"),
+    make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"),
+    make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"),
+    make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"),
+    make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"),
+    make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations")
 )
 
 parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
@@ -286,14 +285,6 @@
     return(base_cmd);
 }
 
-get_file_path <- function(dir, fname) {
-    if (is.null(fname)) {
-        return(fname);
-    } else {
-        return(paste(dir, fname, sep="/"));
-    }
-}
-
 get_mean <- function(n) {
     N = NULL;
     for(i in sort(unique(n[,1]))) {
@@ -349,10 +340,6 @@
 }
 
 run_cmd <- function(cmd, save_ideas_log, output_log, output_dir) {
-    cat("save_ideas_log: ", save_ideas_log, "\n");
-    cat("output_log: ", output_log, "\n");
-    cat("output_dir: ", output_dir, "\n");
-    cat("\nRunning cmd:\n", cmd, "\n\n");
     rc = system(cmd);
     if (rc != 0) {
         if (is.null(save_ideas_log)) {
@@ -369,28 +356,18 @@
 } else {
     output_log = opt$output_log;
 }
-# Get full path of chromosomes.bed if not NULL.
-chrom_bed_input = get_file_path(opt$input_files_path, opt$chrom_bed_input);
-cat("chrom_bed_input: ", chrom_bed_input, "\n");
-# Get full path of chromosome_windows.txt if not NULL.
-chromosome_windows = get_file_path(opt$input_files_path, opt$chromosome_windows);
-cat("chromosome_windows: ", chromosome_windows, "\n");
-if (is.null(chromosome_windows)) {
+if (is.null(opt$chromosome_windows)) {
     windows_by_chrom = NULL;
 } else {
     # Read chromosome_windows.txt into memory.
     windows_by_chrom = get_windows_by_chrom(chromosome_windows);
 }
-ideas_input_config = get_file_path(opt$input_files_path, opt$ideas_input_config);
-cat("ideas_input_config: ", ideas_input_config, "\n");
-base_cmd = get_base_cmd(ideas_input_config, chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp,
+base_cmd = get_base_cmd(opt$ideas_input_config, opt$chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp,
             opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes,
             opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr,
             opt$maxerr, opt$rseed, opt$thread);
-cat("base_cmd: ", base_cmd, "\n");
 output_base_name = opt$project_name;
-cat("output_base_name: ", output_base_name, "\n");
-
+# Perform analysis.
 if (is.null(opt$training_iterations)) {
     # Not performing training.
     if (is.null(windows_by_chrom)) {
@@ -415,7 +392,7 @@
         }
     }
 } else {
-    # performing training.
+    # Performing training.
     output_para0 = paste(output_base_name, "para0", sep=".");
     output_profile0 = paste(output_base_name, "profile0", sep=".");
     for (i in 1:opt$training_iterations) {