Mercurial > repos > greg > ideas
changeset 175:2b6b5e2769f5 draft
Uploaded
author | greg |
---|---|
date | Thu, 25 Jan 2018 13:38:06 -0500 |
parents | b0ca3591242e |
children | e0d5669fee03 |
files | ideas.R |
diffstat | 1 files changed, 29 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/ideas.R Thu Jan 25 11:53:15 2018 -0500 +++ b/ideas.R Thu Jan 25 13:38:06 2018 -0500 @@ -4,32 +4,31 @@ suppressPackageStartupMessages(library("optparse")) option_list <- list( - make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"), - make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"), - make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"), - make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"), - make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"), - make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"), - make_option(c("--input"), action="store", dest="input", help="IdeasPre input dataset"), - make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"), - make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"), - make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"), - make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"), - make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"), - make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"), - make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"), - make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"), - make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"), - make_option(c("--output_dir"), action="store", dest="output_dir", help="Output directory, used only if job ends in error and process log needs saving"), - make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"), - make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"), - make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"), - make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"), - make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"), - make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"), - make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"), - make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"), - make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations") + make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"), + make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"), + make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"), + make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"), + make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"), + make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"), + make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"), + make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"), + make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"), + make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"), + make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"), + make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"), + make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"), + make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"), + make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"), + make_option(c("--output_dir"), action="store", dest="output_dir", help="Output directory, used only if job ends in error and process log needs saving"), + make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"), + make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"), + make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"), + make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"), + make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"), + make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"), + make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"), + make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"), + make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations") ) parser <- OptionParser(usage="%prog [options] file", option_list=option_list) @@ -286,14 +285,6 @@ return(base_cmd); } -get_file_path <- function(dir, fname) { - if (is.null(fname)) { - return(fname); - } else { - return(paste(dir, fname, sep="/")); - } -} - get_mean <- function(n) { N = NULL; for(i in sort(unique(n[,1]))) { @@ -349,10 +340,6 @@ } run_cmd <- function(cmd, save_ideas_log, output_log, output_dir) { - cat("save_ideas_log: ", save_ideas_log, "\n"); - cat("output_log: ", output_log, "\n"); - cat("output_dir: ", output_dir, "\n"); - cat("\nRunning cmd:\n", cmd, "\n\n"); rc = system(cmd); if (rc != 0) { if (is.null(save_ideas_log)) { @@ -369,28 +356,18 @@ } else { output_log = opt$output_log; } -# Get full path of chromosomes.bed if not NULL. -chrom_bed_input = get_file_path(opt$input_files_path, opt$chrom_bed_input); -cat("chrom_bed_input: ", chrom_bed_input, "\n"); -# Get full path of chromosome_windows.txt if not NULL. -chromosome_windows = get_file_path(opt$input_files_path, opt$chromosome_windows); -cat("chromosome_windows: ", chromosome_windows, "\n"); -if (is.null(chromosome_windows)) { +if (is.null(opt$chromosome_windows)) { windows_by_chrom = NULL; } else { # Read chromosome_windows.txt into memory. windows_by_chrom = get_windows_by_chrom(chromosome_windows); } -ideas_input_config = get_file_path(opt$input_files_path, opt$ideas_input_config); -cat("ideas_input_config: ", ideas_input_config, "\n"); -base_cmd = get_base_cmd(ideas_input_config, chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp, +base_cmd = get_base_cmd(opt$ideas_input_config, opt$chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp, opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes, opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr, opt$maxerr, opt$rseed, opt$thread); -cat("base_cmd: ", base_cmd, "\n"); output_base_name = opt$project_name; -cat("output_base_name: ", output_base_name, "\n"); - +# Perform analysis. if (is.null(opt$training_iterations)) { # Not performing training. if (is.null(windows_by_chrom)) { @@ -415,7 +392,7 @@ } } } else { - # performing training. + # Performing training. output_para0 = paste(output_base_name, "para0", sep="."); output_profile0 = paste(output_base_name, "profile0", sep="."); for (i in 1:opt$training_iterations) {