comparison ideas_preprocessor.R @ 21:99102499271a draft

Uploaded
author greg
date Wed, 31 Jan 2018 08:22:43 -0500
parents 504226a7eb75
children 3651f1592f3f
comparison
equal deleted inserted replaced
20:237ee7319452 21:99102499271a
39 s <- paste(s, '<h3>Files prepared for IDEAS</h3>\n', sep=""); 39 s <- paste(s, '<h3>Files prepared for IDEAS</h3>\n', sep="");
40 s <- paste(s, '<ul>\n', sep=""); 40 s <- paste(s, '<ul>\n', sep="");
41 for (i in 1:length(files)) { 41 for (i in 1:length(files)) {
42 s <- paste(s, '<li><a href="', files[i], '">', files[i], '</a></li>\n', sep=""); 42 s <- paste(s, '<li><a href="', files[i], '">', files[i], '</a></li>\n', sep="");
43 } 43 }
44 s <- paste(s, '</ul>\n</body>\n</html>', sep=""); 44 s <- paste(s, '</ul></body></html>', sep="");
45 cat(s, file=output); 45 cat(s, file=output);
46 } 46 }
47
48 tmp_dir = "tmp";
49 output_tmp_dir = paste(opt$output_files_path, tmp_dir, sep="/");
50 dir.create(output_tmp_dir, showWarnings=FALSE);
51 47
52 # Read the ideaspre_input_config text file which has this format: 48 # Read the ideaspre_input_config text file which has this format:
53 # "cell type name" "epigenetic factor name" "file path" "file name" "datatype" 49 # "cell type name" "epigenetic factor name" "file path" "file name" "datatype"
54 ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config)); 50 ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config));
55 51
73 system(cmd); 69 system(cmd);
74 } else { 70 } else {
75 bigwig_file_name = file_path; 71 bigwig_file_name = file_path;
76 } 72 }
77 bed_file_name = paste(file_name, "bed", sep="."); 73 bed_file_name = paste(file_name, "bed", sep=".");
78 bed_file_path = paste("tmp", bed_file_name, sep="/"); 74 cmd = paste("bigWigAverageOverBed", bigwig_file_name, opt$chrom_bed_input, "stdout | cut -f5 >", bed_file_name);
79 cmd = paste("bigWigAverageOverBed", bigwig_file_name, opt$chrom_bed_input, "stdout | cut -f5 >", bed_file_path);
80 system(cmd); 75 system(cmd);
81 cmd = paste("gzip -f", bed_file_path); 76 cmd = paste("gzip -f", bed_file_name);
82 system(cmd); 77 system(cmd);
83 } 78 }
84 } 79 }
85 80
86 # Create file1.txt. 81 # Create file1.txt.
87 cmd = paste("cut -d' '", opt$ideaspre_input_config, "-f1,2 > file1.txt", sep=" "); 82 cmd = paste("cut -d' '", opt$ideaspre_input_config, "-f1,2 > file1.txt", sep=" ");
88 system(cmd); 83 system(cmd);
89 # Compress the bed files in the tmp directory. 84 # Compress the bed files and create file2.txt.
90 tmp_gzipped_files = paste(tmp_dir, "*.bed.gz", sep="/"); 85 cmd = "ls *.bed.gz > file2.txt";
91 # Create file2.txt.
92 cmd = paste("ls", tmp_gzipped_files, "> file2.txt", sep=" ");
93 system(cmd); 86 system(cmd);
94 # Create IDEAS_input_config.txt with the format required by IDEAS. 87 # Create IDEAS_input_config.txt with the format required by IDEAS.
95 ideas_input_config = "IDEAS_input_config.txt" 88 ideas_input_config = "IDEAS_input_config.txt"
96 cmd = paste("paste -d' ' file1.txt file2.txt >", ideas_input_config, sep=" " ); 89 cmd = paste("paste -d' ' file1.txt file2.txt >", ideas_input_config, sep=" " );
97 system(cmd); 90 system(cmd);
98 # Move IDEAS_input_config.txt to the output directory. 91 # Move IDEAS_input_config.txt to the output directory.
99 to_path = paste(opt$output_files_path, ideas_input_config, sep="/"); 92 to_path = paste(opt$output_files_path, ideas_input_config, sep="/");
100 file.rename(ideas_input_config, to_path); 93 file.rename(ideas_input_config, to_path);
101 # Move the compressed bed files in the tmp 94 # Handle optional chrom_bed_input.txt and chromosomes.bed files.
102 # directory to the output tmp directory.
103 tmp_files = list.files(path=tmp_dir);
104 for (i in 1:length(tmp_files)) {
105 from_path = paste(tmp_dir, tmp_files[i], sep="/");
106 to_path = paste(output_tmp_dir, tmp_files[i], sep="/");
107 file.rename(from_path, to_path);
108 }
109 if (!is.null(opt$chrom_bed_input) && !is.null(opt$chromosome_windows)) { 95 if (!is.null(opt$chrom_bed_input) && !is.null(opt$chromosome_windows)) {
110 # Renane opt$chrom_bed_input to be chromosomes.bed 96 # Renane opt$chrom_bed_input to be chromosomes.bed
111 # and make a copy of it in the output directory. 97 # and make a copy of it in the output directory.
112 to_path = paste(opt$output_files_path, "chromosomes.bed", sep="/"); 98 to_path = paste(opt$output_files_path, "chromosomes.bed", sep="/");
113 file.copy(opt$chrom_bed_input, to_path); 99 file.copy(opt$chrom_bed_input, to_path);