Mercurial > repos > greg > ideas
comparison ideas.R @ 170:59ed3d424524 draft
Uploaded
author | greg |
---|---|
date | Thu, 25 Jan 2018 09:30:56 -0500 |
parents | c5b77e9b36f1 |
children | b0ca3591242e |
comparison
equal
deleted
inserted
replaced
169:7b0c6c6cb82b | 170:59ed3d424524 |
---|---|
4 suppressPackageStartupMessages(library("optparse")) | 4 suppressPackageStartupMessages(library("optparse")) |
5 | 5 |
6 option_list <- list( | 6 option_list <- list( |
7 make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"), | 7 make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"), |
8 make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"), | 8 make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"), |
9 make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"), | |
10 make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"), | |
9 make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"), | 11 make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"), |
10 make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"), | 12 make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"), |
13 make_option(c("--input"), action="store", dest="input", help="IdeasPre input dataset"), | |
14 make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"), | |
15 make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"), | |
11 make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"), | 16 make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"), |
12 make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"), | 17 make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"), |
13 make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"), | 18 make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"), |
14 make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"), | 19 make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"), |
15 make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"), | 20 make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"), |
16 make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"), | 21 make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"), |
17 make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"), | 22 make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"), |
18 make_option(c("--norm"), action="store_true", dest="norm", default=FALSE, help="Standardize all datasets"), | |
19 make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"), | 23 make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"), |
20 make_option(c("--prep_output_config"), action="store", dest="prep_output_config", help="prepMat output config file"), | |
21 make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"), | 24 make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"), |
22 make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"), | 25 make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"), |
23 make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"), | 26 make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"), |
24 make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"), | 27 make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"), |
25 make_option(c("--script_dir"), action="store", dest="script_dir", help="R script source directory"), | 28 make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"), |
26 make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"), | 29 make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"), |
27 make_option(c("--tmp_dir"), action="store", dest="tmp_dir", help="Directory of bed files"), | |
28 make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"), | 30 make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"), |
29 make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations"), | 31 make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations") |
30 make_option(c("--windows_bed"), action="store", dest="windows_bed", default=NULL, help="Bed file containing bed windows"), | |
31 make_option(c("--windows_config"), action="store", dest="windows_config", default=NULL, help="Windows positions by chroms config") | |
32 ) | 32 ) |
33 | 33 |
34 parser <- OptionParser(usage="%prog [options] file", option_list=option_list) | 34 parser <- OptionParser(usage="%prog [options] file", option_list=option_list) |
35 args <- parse_args(parser, positional_arguments=TRUE) | 35 args <- parse_args(parser, positional_arguments=TRUE) |
36 opt <- args$options | 36 opt <- args$options |
238 dd[i+dim(NN)[1]] = min(apply(array(NN[,1:p], dim=c(dim(NN)[1],p)), 1, function(x){sqrt(sum((x-MM[i,1:p])^2))})); | 238 dd[i+dim(NN)[1]] = min(apply(array(NN[,1:p], dim=c(dim(NN)[1],p)), 1, function(x){sqrt(sum((x-MM[i,1:p])^2))})); |
239 } | 239 } |
240 return(dd); | 240 return(dd); |
241 } | 241 } |
242 | 242 |
243 get_base_cmd <- function(prep_output_config, windows_bed, training_iterations, bychr, hp, norm, log2, | 243 get_base_cmd <- function(ideas_input_config, chrom_bed_input, training_iterations, bychr, hp, standardize_datasets, log2, |
244 max_states, initial_states, max_position_classes, max_cell_type_clusters, prior_concentration, | 244 max_states, initial_states, max_position_classes, max_cell_type_clusters, prior_concentration, |
245 burnin_num, mcmc_num, minerr, maxerr, rseed, thread) { | 245 burnin_num, mcmc_num, minerr, maxerr, rseed, thread) { |
246 base_cmd = paste("ideas", prep_output_config, sep=" "); | 246 base_cmd = paste("ideas", ideas_input_config, sep=" "); |
247 if (!is.null(windows_bed)) { | 247 if (!is.null(chrom_bed_input)) { |
248 base_cmd = paste(base_cmd, windows_bed, sep=" "); | 248 base_cmd = paste(base_cmd, chrom_bed_input, sep=" "); |
249 } | 249 } |
250 if (!is.null(training_iterations)) { | 250 if (!is.null(training_iterations)) { |
251 base_cmd = paste(base_cmd, "-impute none", sep=" "); | 251 base_cmd = paste(base_cmd, "-impute none", sep=" "); |
252 } | 252 } |
253 if (bychr) { | 253 if (bychr) { |
254 base_cmd = paste(base_cmd, "-bychr", sep=" "); | 254 base_cmd = paste(base_cmd, "-bychr", sep=" "); |
255 } | 255 } |
256 if (hp) { | 256 if (hp) { |
257 base_cmd = paste(base_cmd, "-hp", sep=" "); | 257 base_cmd = paste(base_cmd, "-hp", sep=" "); |
258 } | 258 } |
259 if (norm) { | 259 if (standardize_datasets) { |
260 base_cmd = paste(base_cmd, "-norm", sep=" "); | 260 base_cmd = paste(base_cmd, "-norm", sep=" "); |
261 } | 261 } |
262 if (!is.null(log2)) { | 262 if (!is.null(log2)) { |
263 base_cmd = paste(base_cmd, "-log2", log2, sep=" "); | 263 base_cmd = paste(base_cmd, "-log2", log2, sep=" "); |
264 } | 264 } |
285 base_cmd = paste(base_cmd, "-maxerr", maxerr, sep=" "); | 285 base_cmd = paste(base_cmd, "-maxerr", maxerr, sep=" "); |
286 } | 286 } |
287 base_cmd = paste(base_cmd, "-rseed", rseed, sep=" "); | 287 base_cmd = paste(base_cmd, "-rseed", rseed, sep=" "); |
288 base_cmd = paste(base_cmd, "-thread", thread, sep=" "); | 288 base_cmd = paste(base_cmd, "-thread", thread, sep=" "); |
289 return(base_cmd); | 289 return(base_cmd); |
290 } | |
291 | |
292 get_file_path <- function(dir, fname) { | |
293 if (is.null(fname)) { | |
294 return(fname); | |
295 } else { | |
296 return(paste(dir, fname, sep="/")); | |
297 } | |
290 } | 298 } |
291 | 299 |
292 get_mean <- function(n) { | 300 get_mean <- function(n) { |
293 N = NULL; | 301 N = NULL; |
294 for(i in sort(unique(n[,1]))) { | 302 for(i in sort(unique(n[,1]))) { |
314 } | 322 } |
315 base_cmd = paste(base_cmd_items, collapse=" "); | 323 base_cmd = paste(base_cmd_items, collapse=" "); |
316 return(base_cmd); | 324 return(base_cmd); |
317 } | 325 } |
318 | 326 |
319 get_windows_by_chrom <- function(windows_config) { | 327 get_windows_by_chrom <- function(chromosome_windows) { |
320 if (is.null(windows_config)) { | 328 fh = file(chromosome_windows, "r"); |
321 windows_by_chrom = NULL; | 329 windows_by_chrom = readLines(fh); |
322 } else { | 330 close(fh); |
323 fh = file(windows_config, "r"); | |
324 windows_by_chrom = readLines(fh); | |
325 close(fh); | |
326 } | |
327 return(windows_by_chrom); | 331 return(windows_by_chrom); |
328 } | 332 } |
329 | 333 |
330 make_parameter <- function(myorder, id, mem, mycut, para) { | 334 make_parameter <- function(myorder, id, mem, mycut, para) { |
331 rt = NULL; | 335 rt = NULL; |
355 } | 359 } |
356 quit(save="no", status=rc); | 360 quit(save="no", status=rc); |
357 } | 361 } |
358 } | 362 } |
359 | 363 |
364 # Initialize values. | |
360 default_log_name = "ideas_log.txt"; | 365 default_log_name = "ideas_log.txt"; |
361 windows_by_chrom = get_windows_by_chrom(opt$windows_config); | 366 # Get full path of chromosomes.bed if not NULL. |
362 base_cmd = get_base_cmd(opt$prep_output_config, opt$windows_bed, opt$training_iterations, opt$bychr, | 367 chrom_bed_input = get_file_path(opt$input_files_path, opt$chrom_bed_input); |
363 opt$hp, opt$norm, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes, | 368 # Get full path of chromosome_windows.txt if not NULL. |
364 opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr, | 369 chromosome_windows = get_file_path(opt$input_files_path, opt$chromosome_windows); |
365 opt$maxerr, opt$rseed, opt$thread); | 370 if (is.null(chromosome_windows)) { |
371 windows_by_chrom = NULL; | |
372 } else { | |
373 # Read chromosome_windows.txt into memory. | |
374 windows_by_chrom = get_windows_by_chrom(chromosome_windows); | |
375 } | |
376 ideas_input_config = get_file_path(opt$input_files_path, opt$ideas_input_config); | |
377 base_cmd = get_base_cmd(ideas_input_config, chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp, | |
378 opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes, | |
379 opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr, | |
380 opt$maxerr, opt$rseed, opt$thread); | |
366 output_base_name = opt$project_name; | 381 output_base_name = opt$project_name; |
367 | 382 |
368 if (is.null(opt$training_iterations)) { | 383 if (is.null(opt$training_iterations)) { |
369 # Not performing training. | 384 # Not performing training. |
370 if (is.null(windows_by_chrom)) { | 385 if (is.null(windows_by_chrom)) { |