changeset 170:59ed3d424524 draft

Uploaded
author greg
date Thu, 25 Jan 2018 09:30:56 -0500
parents 7b0c6c6cb82b
children 445f67ea18f6
files ideas.R
diffstat 1 files changed, 40 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/ideas.R	Thu Jan 25 09:30:44 2018 -0500
+++ b/ideas.R	Thu Jan 25 09:30:56 2018 -0500
@@ -6,8 +6,13 @@
 option_list <- list(
     make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"),
     make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"),
+    make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"),
+    make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"),
     make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"),
     make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"),
+    make_option(c("--input"), action="store", dest="input", help="IdeasPre input dataset"),
+    make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"),
+    make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"),
     make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"),
     make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"),
     make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"),
@@ -15,20 +20,15 @@
     make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"),
     make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"),
     make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"),
-    make_option(c("--norm"), action="store_true", dest="norm", default=FALSE, help="Standardize all datasets"),
     make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"),
-    make_option(c("--prep_output_config"), action="store", dest="prep_output_config", help="prepMat output config file"),
     make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"),
     make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"),
     make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"),
     make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"),
-    make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory"),
+    make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"),
     make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"),
-    make_option(c("--tmp_dir"), action="store", dest="tmp_dir", help="Directory of bed files"),
     make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"),
-    make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations"),
-    make_option(c("--windows_bed"), action="store", dest="windows_bed", default=NULL, help="Bed file containing bed windows"),
-    make_option(c("--windows_config"), action="store", dest="windows_config", default=NULL, help="Windows positions by chroms config")
+    make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations")
 )
 
 parser <- OptionParser(usage="%prog [options] file", option_list=option_list)
@@ -240,12 +240,12 @@
     return(dd);
 }
 
-get_base_cmd <- function(prep_output_config, windows_bed, training_iterations, bychr, hp, norm, log2,
+get_base_cmd <- function(ideas_input_config, chrom_bed_input, training_iterations, bychr, hp, standardize_datasets, log2,
         max_states, initial_states, max_position_classes, max_cell_type_clusters, prior_concentration,
         burnin_num, mcmc_num, minerr, maxerr, rseed, thread) {
-    base_cmd = paste("ideas", prep_output_config, sep=" ");
-    if (!is.null(windows_bed)) {
-        base_cmd = paste(base_cmd, windows_bed, sep=" ");
+    base_cmd = paste("ideas", ideas_input_config, sep=" ");
+    if (!is.null(chrom_bed_input)) {
+        base_cmd = paste(base_cmd, chrom_bed_input, sep=" ");
     }
     if (!is.null(training_iterations)) {
         base_cmd = paste(base_cmd, "-impute none", sep=" ");
@@ -256,7 +256,7 @@
     if (hp) {
         base_cmd = paste(base_cmd, "-hp", sep=" ");
     }
-    if (norm) {
+    if (standardize_datasets) {
         base_cmd = paste(base_cmd, "-norm", sep=" ");
     }
     if (!is.null(log2)) {
@@ -289,6 +289,14 @@
     return(base_cmd);
 }
 
+get_file_path <- function(dir, fname) {
+    if (is.null(fname)) {
+        return(fname);
+    } else {
+        return(paste(dir, fname, sep="/"));
+    }
+}
+
 get_mean <- function(n) {
     N = NULL;
     for(i in sort(unique(n[,1]))) {
@@ -316,14 +324,10 @@
     return(base_cmd);
 }
 
-get_windows_by_chrom <- function(windows_config) {
-    if (is.null(windows_config)) {
-        windows_by_chrom = NULL;
-    } else {
-        fh = file(windows_config, "r");
-        windows_by_chrom = readLines(fh);
-        close(fh);
-    }
+get_windows_by_chrom <- function(chromosome_windows) {
+    fh = file(chromosome_windows, "r");
+    windows_by_chrom = readLines(fh);
+    close(fh);
     return(windows_by_chrom);
 }
 
@@ -357,12 +361,23 @@
     }
 }
 
+# Initialize values.
 default_log_name = "ideas_log.txt";
-windows_by_chrom = get_windows_by_chrom(opt$windows_config);
-base_cmd = get_base_cmd(opt$prep_output_config, opt$windows_bed, opt$training_iterations, opt$bychr,
-        opt$hp, opt$norm, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes,
-        opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr,
-        opt$maxerr, opt$rseed, opt$thread);
+# Get full path of chromosomes.bed if not NULL.
+chrom_bed_input = get_file_path(opt$input_files_path, opt$chrom_bed_input);
+# Get full path of chromosome_windows.txt if not NULL.
+chromosome_windows = get_file_path(opt$input_files_path, opt$chromosome_windows);
+if (is.null(chromosome_windows)) {
+    windows_by_chrom = NULL;
+} else {
+    # Read chromosome_windows.txt into memory.
+    windows_by_chrom = get_windows_by_chrom(chromosome_windows);
+}
+ideas_input_config = get_file_path(opt$input_files_path, opt$ideas_input_config);
+base_cmd = get_base_cmd(ideas_input_config, chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp,
+    opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes,
+    opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr,
+    opt$maxerr, opt$rseed, opt$thread);
 output_base_name = opt$project_name;
 
 if (is.null(opt$training_iterations)) {