Mercurial > repos > greg > ideas_preprocessor
comparison ideas_preprocessor.R @ 14:20c21d946a8e draft
Uploaded
author | greg |
---|---|
date | Wed, 24 Jan 2018 14:04:55 -0500 |
parents | ab0f306504a3 |
children | ce2021cd68d2 |
comparison
equal
deleted
inserted
replaced
13:4d542da396a7 | 14:20c21d946a8e |
---|---|
1 #!/usr/bin/env Rscript | 1 #!/usr/bin/env Rscript |
2 | |
3 # TODO: implement support for the following: | |
4 # 1. Scenario where user did not select chrom_bed_input | |
5 # 2. --exclude_bed_input | |
6 # 3. --bychr | |
7 # 4. --chrom_len_file | |
8 # 5. --reads_per_bp | |
9 # 6. --restrict_to_chroms | |
10 # 7. --standardize_datasets | |
11 # 8. Scenario where --window_size is NULL and need to handle bamCoverage - see TODO near line # 57. | |
12 | |
2 | 13 |
3 suppressPackageStartupMessages(library("data.table")) | 14 suppressPackageStartupMessages(library("data.table")) |
4 suppressPackageStartupMessages(library("optparse")) | 15 suppressPackageStartupMessages(library("optparse")) |
5 | 16 |
6 option_list <- list( | 17 option_list <- list( |
7 make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", defaul=NULL, help="Chromosome windows positions file"), | 18 make_option(c("--chrom_bed_input"), action="store", dest="chrom_bed_input", defaul=NULL, help="Chromosome windows positions file"), |
8 make_option(c("--exclude_input"), action="store", dest="exclude_input", defaul=NULL, help="File(s) containing regions to exclude"), | 19 make_option(c("--exclude_bed_input"), action="store", dest="exclude_bed_input", defaul=NULL, help="File(s) containing regions to exclude"), |
9 make_option(c("--bychr"), action="store_true", dest="bychr", defaul=FALSE, help="Separate files by chromosome"), | 20 make_option(c("--bychr"), action="store_true", dest="bychr", defaul=FALSE, help="Separate files by chromosome"), |
10 make_option(c("--chrom_len_file"), action="store", dest="chrom_len_file", default=NULL, help="Chromosome lengths file"), | 21 make_option(c("--chrom_len_file"), action="store", dest="chrom_len_file", default=NULL, help="Chromosome lengths file"), |
11 make_option(c("--ideaspre_input_config"), action="store", dest="ideaspre_input_config", help="Preprocessing input config file"), | 22 make_option(c("--ideaspre_input_config"), action="store", dest="ideaspre_input_config", help="Preprocessing input config file"), |
12 make_option(c("--output"), action="store", dest="output", help="Primary output dataset"), | 23 make_option(c("--output"), action="store", dest="output", help="Primary output dataset"), |
13 make_option(c("--output_files_path"), action="store", dest="output_files_path", help="Primary output dataset extra files path"), | 24 make_option(c("--output_files_path"), action="store", dest="output_files_path", help="Primary output dataset extra files path"), |
40 dir.create(output_tmp_dir, showWarnings=FALSE); | 51 dir.create(output_tmp_dir, showWarnings=FALSE); |
41 | 52 |
42 # Read the ideaspre_input_config text file which has this format: | 53 # Read the ideaspre_input_config text file which has this format: |
43 # "cell type name" "epigenetic factor name" "file path" "file name" "datatype" | 54 # "cell type name" "epigenetic factor name" "file path" "file name" "datatype" |
44 ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config)); | 55 ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config)); |
56 | |
57 # TODO: fix this | |
58 window_size = opt$window_size | |
59 if (is.null(opt$window_size)) { | |
60 window_size = 500; | |
61 } | |
45 # Process data to windows mean. | 62 # Process data to windows mean. |
46 # TODO: implement scenario where user did not select chrom_bed_input. | |
47 if (!is.null(opt$chrom_bed_input)) { | 63 if (!is.null(opt$chrom_bed_input)) { |
48 for (i in 1:dim(ideaspre_input_config)[1]) { | 64 for (i in 1:dim(ideaspre_input_config)[1]) { |
49 file_path = ideaspre_input_config[i, 3] | 65 file_path = ideaspre_input_config[i, 3] |
50 file_name = ideaspre_input_config[i, 4] | 66 file_name = ideaspre_input_config[i, 4] |
51 datatype = ideaspre_input_config[i, 5] | 67 datatype = ideaspre_input_config[i, 5] |
52 if (datatype == "bam") { | 68 if (datatype == "bam") { |
53 cmd = paste("samtools index", file_path); | 69 cmd = paste("samtools index", file_path); |
54 system(cmd); | 70 system(cmd); |
55 bigwig_file_name = paste(file_name, "bw", sep="."); | 71 bigwig_file_name = paste(file_name, "bw", sep="."); |
56 cmd = paste("bamCoverage --bam", file_path, "-o", bigwig_file_name, "--binSize", opt$window_size); | 72 |
73 cmd = paste("bamCoverage --bam", file_path, "-o", bigwig_file_name, "--binSize", window_size); | |
57 system(cmd); | 74 system(cmd); |
58 } else { | 75 } else { |
59 bigwig_file_name = file_path; | 76 bigwig_file_name = file_path; |
60 } | 77 } |
61 bed_file_name = paste(file_name, "bed", sep="."); | 78 bed_file_name = paste(file_name, "bed", sep="."); |