comparison ideas_preprocessor.R @ 14:20c21d946a8e draft

Uploaded
author greg
date Wed, 24 Jan 2018 14:04:55 -0500
parents ab0f306504a3
children ce2021cd68d2
comparison
equal deleted inserted replaced
13:4d542da396a7 14:20c21d946a8e
1 #!/usr/bin/env Rscript 1 #!/usr/bin/env Rscript
2
3 # TODO: implement support for the following:
4 # 1. Scenario where user did not select chrom_bed_input
5 # 2. --exclude_bed_input
6 # 3. --bychr
7 # 4. --chrom_len_file
8 # 5. --reads_per_bp
9 # 6. --restrict_to_chroms
10 # 7. --standardize_datasets
11 # 8. Scenario where --window_size is NULL and need to handle bamCoverage - see TODO near line # 57.
12
2 13
3 suppressPackageStartupMessages(library("data.table")) 14 suppressPackageStartupMessages(library("data.table"))
4 suppressPackageStartupMessages(library("optparse")) 15 suppressPackageStartupMessages(library("optparse"))
5 16
6 option_list <- list( 17 option_list <- list(
7 make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", defaul=NULL, help="Chromosome windows positions file"), 18 make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", defaul=NULL, help="Chromosome windows positions file"),
8 make_option(c("--exclude_input"), action="store", dest="exclude_input", defaul=NULL, help="File(s) containing regions to exclude"), 19 make_option(c("--exclude_bed_input"), action="store", dest="exclude_bed_input", defaul=NULL, help="File(s) containing regions to exclude"),
9 make_option(c("--bychr"), action="store_true", dest="bychr", defaul=FALSE, help="Separate files by chromosome"), 20 make_option(c("--bychr"), action="store_true", dest="bychr", defaul=FALSE, help="Separate files by chromosome"),
10 make_option(c("--chrom_len_file"), action="store", dest="chrom_len_file", default=NULL, help="Chromosome lengths file"), 21 make_option(c("--chrom_len_file"), action="store", dest="chrom_len_file", default=NULL, help="Chromosome lengths file"),
11 make_option(c("--ideaspre_input_config"), action="store", dest="ideaspre_input_config", help="Preprocessing input config file"), 22 make_option(c("--ideaspre_input_config"), action="store", dest="ideaspre_input_config", help="Preprocessing input config file"),
12 make_option(c("--output"), action="store", dest="output", help="Primary output dataset"), 23 make_option(c("--output"), action="store", dest="output", help="Primary output dataset"),
13 make_option(c("--output_files_path"), action="store", dest="output_files_path", help="Primary output dataset extra files path"), 24 make_option(c("--output_files_path"), action="store", dest="output_files_path", help="Primary output dataset extra files path"),
40 dir.create(output_tmp_dir, showWarnings=FALSE); 51 dir.create(output_tmp_dir, showWarnings=FALSE);
41 52
42 # Read the ideaspre_input_config text file which has this format: 53 # Read the ideaspre_input_config text file which has this format:
43 # "cell type name" "epigenetic factor name" "file path" "file name" "datatype" 54 # "cell type name" "epigenetic factor name" "file path" "file name" "datatype"
44 ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config)); 55 ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config));
56
57 # TODO: fix this
58 window_size = opt$window_size
59 if (is.null(opt$window_size)) {
60 window_size = 500;
61 }
45 # Process data to windows mean. 62 # Process data to windows mean.
46 # TODO: implement scenario where user did not select chrom_bed_input.
47 if (!is.null(opt$chrom_bed_input)) { 63 if (!is.null(opt$chrom_bed_input)) {
48 for (i in 1:dim(ideaspre_input_config)[1]) { 64 for (i in 1:dim(ideaspre_input_config)[1]) {
49 file_path = ideaspre_input_config[i, 3] 65 file_path = ideaspre_input_config[i, 3]
50 file_name = ideaspre_input_config[i, 4] 66 file_name = ideaspre_input_config[i, 4]
51 datatype = ideaspre_input_config[i, 5] 67 datatype = ideaspre_input_config[i, 5]
52 if (datatype == "bam") { 68 if (datatype == "bam") {
53 cmd = paste("samtools index", file_path); 69 cmd = paste("samtools index", file_path);
54 system(cmd); 70 system(cmd);
55 bigwig_file_name = paste(file_name, "bw", sep="."); 71 bigwig_file_name = paste(file_name, "bw", sep=".");
56 cmd = paste("bamCoverage --bam", file_path, "-o", bigwig_file_name, "--binSize", opt$window_size); 72
73 cmd = paste("bamCoverage --bam", file_path, "-o", bigwig_file_name, "--binSize", window_size);
57 system(cmd); 74 system(cmd);
58 } else { 75 } else {
59 bigwig_file_name = file_path; 76 bigwig_file_name = file_path;
60 } 77 }
61 bed_file_name = paste(file_name, "bed", sep="."); 78 bed_file_name = paste(file_name, "bed", sep=".");