Mercurial > repos > greg > ideas
changeset 170:59ed3d424524 draft
Uploaded
author | greg |
---|---|
date | Thu, 25 Jan 2018 09:30:56 -0500 |
parents | 7b0c6c6cb82b |
children | 445f67ea18f6 |
files | ideas.R |
diffstat | 1 files changed, 40 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/ideas.R Thu Jan 25 09:30:44 2018 -0500 +++ b/ideas.R Thu Jan 25 09:30:56 2018 -0500 @@ -6,8 +6,13 @@ option_list <- list( make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"), make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"), + make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"), + make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"), make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"), make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"), + make_option(c("--input"), action="store", dest="input", help="IdeasPre input dataset"), + make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"), + make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"), make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"), make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"), make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"), @@ -15,20 +20,15 @@ make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"), make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"), make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"), - make_option(c("--norm"), action="store_true", dest="norm", default=FALSE, help="Standardize all datasets"), make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"), - make_option(c("--prep_output_config"), action="store", dest="prep_output_config", help="prepMat output config file"), make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"), make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"), make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"), make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"), - make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory"), + make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"), make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"), - make_option(c("--tmp_dir"), action="store", dest="tmp_dir", help="Directory of bed files"), make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"), - make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations"), - make_option(c("--windows_bed"), action="store", dest="windows_bed", default=NULL, help="Bed file containing bed windows"), - make_option(c("--windows_config"), action="store", dest="windows_config", default=NULL, help="Windows positions by chroms config") + make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations") ) parser <- OptionParser(usage="%prog [options] file", option_list=option_list) @@ -240,12 +240,12 @@ return(dd); } -get_base_cmd <- function(prep_output_config, windows_bed, training_iterations, bychr, hp, norm, log2, +get_base_cmd <- function(ideas_input_config, chrom_bed_input, training_iterations, bychr, hp, standardize_datasets, log2, max_states, initial_states, max_position_classes, max_cell_type_clusters, prior_concentration, burnin_num, mcmc_num, minerr, maxerr, rseed, thread) { - base_cmd = paste("ideas", prep_output_config, sep=" "); - if (!is.null(windows_bed)) { - base_cmd = paste(base_cmd, windows_bed, sep=" "); + base_cmd = paste("ideas", ideas_input_config, sep=" "); + if (!is.null(chrom_bed_input)) { + base_cmd = paste(base_cmd, chrom_bed_input, sep=" "); } if (!is.null(training_iterations)) { base_cmd = paste(base_cmd, "-impute none", sep=" "); @@ -256,7 +256,7 @@ if (hp) { base_cmd = paste(base_cmd, "-hp", sep=" "); } - if (norm) { + if (standardize_datasets) { base_cmd = paste(base_cmd, "-norm", sep=" "); } if (!is.null(log2)) { @@ -289,6 +289,14 @@ return(base_cmd); } +get_file_path <- function(dir, fname) { + if (is.null(fname)) { + return(fname); + } else { + return(paste(dir, fname, sep="/")); + } +} + get_mean <- function(n) { N = NULL; for(i in sort(unique(n[,1]))) { @@ -316,14 +324,10 @@ return(base_cmd); } -get_windows_by_chrom <- function(windows_config) { - if (is.null(windows_config)) { - windows_by_chrom = NULL; - } else { - fh = file(windows_config, "r"); - windows_by_chrom = readLines(fh); - close(fh); - } +get_windows_by_chrom <- function(chromosome_windows) { + fh = file(chromosome_windows, "r"); + windows_by_chrom = readLines(fh); + close(fh); return(windows_by_chrom); } @@ -357,12 +361,23 @@ } } +# Initialize values. default_log_name = "ideas_log.txt"; -windows_by_chrom = get_windows_by_chrom(opt$windows_config); -base_cmd = get_base_cmd(opt$prep_output_config, opt$windows_bed, opt$training_iterations, opt$bychr, - opt$hp, opt$norm, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes, - opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr, - opt$maxerr, opt$rseed, opt$thread); +# Get full path of chromosomes.bed if not NULL. +chrom_bed_input = get_file_path(opt$input_files_path, opt$chrom_bed_input); +# Get full path of chromosome_windows.txt if not NULL. +chromosome_windows = get_file_path(opt$input_files_path, opt$chromosome_windows); +if (is.null(chromosome_windows)) { + windows_by_chrom = NULL; +} else { + # Read chromosome_windows.txt into memory. + windows_by_chrom = get_windows_by_chrom(chromosome_windows); +} +ideas_input_config = get_file_path(opt$input_files_path, opt$ideas_input_config); +base_cmd = get_base_cmd(ideas_input_config, chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp, + opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes, + opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr, + opt$maxerr, opt$rseed, opt$thread); output_base_name = opt$project_name; if (is.null(opt$training_iterations)) {