Mercurial > repos > greg > ideas
comparison ideas.R @ 175:2b6b5e2769f5 draft
Uploaded
author | greg |
---|---|
date | Thu, 25 Jan 2018 13:38:06 -0500 |
parents | b0ca3591242e |
children | f32f15562a82 |
comparison
equal
deleted
inserted
replaced
174:b0ca3591242e | 175:2b6b5e2769f5 |
---|---|
2 | 2 |
3 suppressPackageStartupMessages(library("data.table")) | 3 suppressPackageStartupMessages(library("data.table")) |
4 suppressPackageStartupMessages(library("optparse")) | 4 suppressPackageStartupMessages(library("optparse")) |
5 | 5 |
6 option_list <- list( | 6 option_list <- list( |
7 make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"), | 7 make_option(c("--burnin_num"), action="store", dest="burnin_num", type="integer", help="Number of burnin steps"), |
8 make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"), | 8 make_option(c("--bychr"), action="store_true", dest="bychr", default=FALSE, help="Output chromosomes in separate files"), |
9 make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"), | 9 make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", default=NULL, help="Chromosome windows positions file"), |
10 make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"), | 10 make_option(c("--chromosome_windows"), action="store", dest="chromosome_windows", default=NULL, help="Windows positions by chroms config file"), |
11 make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"), | 11 make_option(c("--hp"), action="store_true", dest="hp", default=FALSE, help="Discourage state transition across chromosomes"), |
12 make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"), | 12 make_option(c("--initial_states"), action="store", dest="initial_states", type="integer", default=NULL, help="Initial number of states"), |
13 make_option(c("--input"), action="store", dest="input", help="IdeasPre input dataset"), | 13 make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"), |
14 make_option(c("--input_files_path"), action="store", dest="input_files_path", help="IdeasPre input dataset extra files path"), | 14 make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"), |
15 make_option(c("--ideas_input_config"), action="store", dest="ideas_input_config", help="IDEAS_input_config file"), | 15 make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"), |
16 make_option(c("--log2"), action="store", dest="log2", type="double", default=NULL, help="log2 transformation"), | 16 make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"), |
17 make_option(c("--maxerr"), action="store", dest="maxerr", type="double", default=NULL, help="Maximum standard deviation for the emission Gaussian distribution"), | 17 make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"), |
18 make_option(c("--max_cell_type_clusters"), action="store", dest="max_cell_type_clusters", type="integer", default=NULL, help="Maximum number of cell type clusters allowed"), | 18 make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"), |
19 make_option(c("--max_position_classes"), action="store", dest="max_position_classes", type="integer", default=NULL, help="Maximum number of position classes to be inferred"), | 19 make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"), |
20 make_option(c("--max_states"), action="store", dest="max_states", type="double", default=NULL, help="Maximum number of states to be inferred"), | 20 make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"), |
21 make_option(c("--mcmc_num"), action="store", dest="mcmc_num", type="integer", help="Number of maximization steps"), | 21 make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"), |
22 make_option(c("--minerr"), action="store", dest="minerr", type="double", default=NULL, help="Minimum standard deviation for the emission Gaussian distribution"), | 22 make_option(c("--output_dir"), action="store", dest="output_dir", help="Output directory, used only if job ends in error and process log needs saving"), |
23 make_option(c("--output_dir"), action="store", dest="output_dir", help="Output directory, used only if job ends in error and process log needs saving"), | 23 make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"), |
24 make_option(c("--output_log"), action="store", dest="output_log", default=NULL, help="Output log file path"), | 24 make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"), |
25 make_option(c("--prior_concentration"), action="store", dest="prior_concentration", type="double", default=NULL, help="Prior concentration"), | 25 make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"), |
26 make_option(c("--project_name"), action="store", dest="project_name", help="Outputs will have this base name"), | 26 make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"), |
27 make_option(c("--rseed"), action="store", dest="rseed", type="integer", help="Seed for IDEAS model initialization"), | 27 make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"), |
28 make_option(c("--save_ideas_log"), action="store", dest="save_ideas_log", default=NULL, help="Flag to save IDEAS process log"), | 28 make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"), |
29 make_option(c("--standardize_datasets"), action="store_true", dest="standardize_datasets", default=FALSE, help="Standardize all datasets"), | 29 make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"), |
30 make_option(c("--thread"), action="store", dest="thread", type="integer", help="Process threads"), | 30 make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"), |
31 make_option(c("--training_iterations"), action="store", dest="training_iterations", type="integer", default=NULL, help="Number of training iterations"), | 31 make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations") |
32 make_option(c("--training_windows"), action="store", dest="training_windows", type="integer", default=NULL, help="Number of training iterations") | |
33 ) | 32 ) |
34 | 33 |
35 parser <- OptionParser(usage="%prog [options] file", option_list=option_list) | 34 parser <- OptionParser(usage="%prog [options] file", option_list=option_list) |
36 args <- parse_args(parser, positional_arguments=TRUE) | 35 args <- parse_args(parser, positional_arguments=TRUE) |
37 opt <- args$options | 36 opt <- args$options |
284 base_cmd = paste(base_cmd, "-rseed", rseed, sep=" "); | 283 base_cmd = paste(base_cmd, "-rseed", rseed, sep=" "); |
285 base_cmd = paste(base_cmd, "-thread", thread, sep=" "); | 284 base_cmd = paste(base_cmd, "-thread", thread, sep=" "); |
286 return(base_cmd); | 285 return(base_cmd); |
287 } | 286 } |
288 | 287 |
289 get_file_path <- function(dir, fname) { | |
290 if (is.null(fname)) { | |
291 return(fname); | |
292 } else { | |
293 return(paste(dir, fname, sep="/")); | |
294 } | |
295 } | |
296 | |
297 get_mean <- function(n) { | 288 get_mean <- function(n) { |
298 N = NULL; | 289 N = NULL; |
299 for(i in sort(unique(n[,1]))) { | 290 for(i in sort(unique(n[,1]))) { |
300 t = which(n[,1]==i); | 291 t = which(n[,1]==i); |
301 N = rbind(N, apply(array(n[t,], dim=c(length(t), dim(n)[2])), 2, sum)[-1]); | 292 N = rbind(N, apply(array(n[t,], dim=c(length(t), dim(n)[2])), 2, sum)[-1]); |
347 unlink(f); | 338 unlink(f); |
348 } | 339 } |
349 } | 340 } |
350 | 341 |
351 run_cmd <- function(cmd, save_ideas_log, output_log, output_dir) { | 342 run_cmd <- function(cmd, save_ideas_log, output_log, output_dir) { |
352 cat("save_ideas_log: ", save_ideas_log, "\n"); | |
353 cat("output_log: ", output_log, "\n"); | |
354 cat("output_dir: ", output_dir, "\n"); | |
355 cat("\nRunning cmd:\n", cmd, "\n\n"); | |
356 rc = system(cmd); | 343 rc = system(cmd); |
357 if (rc != 0) { | 344 if (rc != 0) { |
358 if (is.null(save_ideas_log)) { | 345 if (is.null(save_ideas_log)) { |
359 to_path = paste(output_dir, output_log, sep="/"); | 346 to_path = paste(output_dir, output_log, sep="/"); |
360 file.rename(output_log, to_path); | 347 file.rename(output_log, to_path); |
367 if (is.null(opt$save_ideas_log)) { | 354 if (is.null(opt$save_ideas_log)) { |
368 output_log = "ideas_log.txt"; | 355 output_log = "ideas_log.txt"; |
369 } else { | 356 } else { |
370 output_log = opt$output_log; | 357 output_log = opt$output_log; |
371 } | 358 } |
372 # Get full path of chromosomes.bed if not NULL. | 359 if (is.null(opt$chromosome_windows)) { |
373 chrom_bed_input = get_file_path(opt$input_files_path, opt$chrom_bed_input); | |
374 cat("chrom_bed_input: ", chrom_bed_input, "\n"); | |
375 # Get full path of chromosome_windows.txt if not NULL. | |
376 chromosome_windows = get_file_path(opt$input_files_path, opt$chromosome_windows); | |
377 cat("chromosome_windows: ", chromosome_windows, "\n"); | |
378 if (is.null(chromosome_windows)) { | |
379 windows_by_chrom = NULL; | 360 windows_by_chrom = NULL; |
380 } else { | 361 } else { |
381 # Read chromosome_windows.txt into memory. | 362 # Read chromosome_windows.txt into memory. |
382 windows_by_chrom = get_windows_by_chrom(chromosome_windows); | 363 windows_by_chrom = get_windows_by_chrom(chromosome_windows); |
383 } | 364 } |
384 ideas_input_config = get_file_path(opt$input_files_path, opt$ideas_input_config); | 365 base_cmd = get_base_cmd(opt$ideas_input_config, opt$chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp, |
385 cat("ideas_input_config: ", ideas_input_config, "\n"); | |
386 base_cmd = get_base_cmd(ideas_input_config, chrom_bed_input, opt$training_iterations, opt$bychr, opt$hp, | |
387 opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes, | 366 opt$standardize_datasets, opt$log2, opt$max_states, opt$initial_states, opt$max_position_classes, |
388 opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr, | 367 opt$max_cell_type_clusters, opt$prior_concentration, opt$burnin_num, opt$mcmc_num, opt$minerr, |
389 opt$maxerr, opt$rseed, opt$thread); | 368 opt$maxerr, opt$rseed, opt$thread); |
390 cat("base_cmd: ", base_cmd, "\n"); | |
391 output_base_name = opt$project_name; | 369 output_base_name = opt$project_name; |
392 cat("output_base_name: ", output_base_name, "\n"); | 370 # Perform analysis. |
393 | |
394 if (is.null(opt$training_iterations)) { | 371 if (is.null(opt$training_iterations)) { |
395 # Not performing training. | 372 # Not performing training. |
396 if (is.null(windows_by_chrom)) { | 373 if (is.null(windows_by_chrom)) { |
397 # Not performing windows by chromosome. | 374 # Not performing windows by chromosome. |
398 output_name = output_base_name; | 375 output_name = output_base_name; |
413 cmd = add_output_redirect(cmd, output_log); | 390 cmd = add_output_redirect(cmd, output_log); |
414 run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir); | 391 run_cmd(cmd, opt$save_ideas_log, output_log, opt$output_dir); |
415 } | 392 } |
416 } | 393 } |
417 } else { | 394 } else { |
418 # performing training. | 395 # Performing training. |
419 output_para0 = paste(output_base_name, "para0", sep="."); | 396 output_para0 = paste(output_base_name, "para0", sep="."); |
420 output_profile0 = paste(output_base_name, "profile0", sep="."); | 397 output_profile0 = paste(output_base_name, "profile0", sep="."); |
421 for (i in 1:opt$training_iterations) { | 398 for (i in 1:opt$training_iterations) { |
422 cmd = paste(base_cmd, "-o", paste(output_base_name, ".tmp.", i, sep=""), sep=" "); | 399 cmd = paste(base_cmd, "-o", paste(output_base_name, ".tmp.", i, sep=""), sep=" "); |
423 cmd = add_output_redirect(cmd, output_log); | 400 cmd = add_output_redirect(cmd, output_log); |