comparison ideas_preprocessor.R @ 22:3651f1592f3f draft

Uploaded
author greg
date Wed, 31 Jan 2018 14:13:11 -0500
parents 99102499271a
children f7563bb242fc
comparison
equal deleted inserted replaced
21:99102499271a 22:3651f1592f3f
43 } 43 }
44 s <- paste(s, '</ul></body></html>', sep=""); 44 s <- paste(s, '</ul></body></html>', sep="");
45 cat(s, file=output); 45 cat(s, file=output);
46 } 46 }
47 47
48 tmp_dir = "tmp";
49
48 # Read the ideaspre_input_config text file which has this format: 50 # Read the ideaspre_input_config text file which has this format:
49 # "cell type name" "epigenetic factor name" "file path" "file name" "datatype" 51 # "cell type name" "epigenetic factor name" "file path" "file name" "datatype"
50 ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config)); 52 ideaspre_input_config = as.matrix(read.table(opt$ideaspre_input_config));
51 53
52 # TODO: fix this 54 # TODO: fix this
69 system(cmd); 71 system(cmd);
70 } else { 72 } else {
71 bigwig_file_name = file_path; 73 bigwig_file_name = file_path;
72 } 74 }
73 bed_file_name = paste(file_name, "bed", sep="."); 75 bed_file_name = paste(file_name, "bed", sep=".");
74 cmd = paste("bigWigAverageOverBed", bigwig_file_name, opt$chrom_bed_input, "stdout | cut -f5 >", bed_file_name); 76 bed_file_path = paste("tmp", bed_file_name, sep="/");
77 cmd = paste("bigWigAverageOverBed", bigwig_file_name, opt$chrom_bed_input, "stdout | cut -f5 >", bed_file_path);
75 system(cmd); 78 system(cmd);
76 cmd = paste("gzip -f", bed_file_name); 79 cmd = paste("gzip -f", bed_file_path);
77 system(cmd); 80 system(cmd);
78 } 81 }
79 } 82 }
80 83
81 # Create file1.txt. 84 # Create file1.txt.
82 cmd = paste("cut -d' '", opt$ideaspre_input_config, "-f1,2 > file1.txt", sep=" "); 85 cmd = paste("cut -d' '", opt$ideaspre_input_config, "-f1,2 > file1.txt", sep=" ");
83 system(cmd); 86 system(cmd);
84 # Compress the bed files and create file2.txt. 87 # Compress the bed files in the tmp directory.
85 cmd = "ls *.bed.gz > file2.txt"; 88 tmp_gzipped_files = paste(tmp_dir, "*.bed.gz", sep="/");
89 # Create file2.txt.
90 cmd = paste("ls", tmp_gzipped_files, "> file2.txt", sep=" ");
86 system(cmd); 91 system(cmd);
87 # Create IDEAS_input_config.txt with the format required by IDEAS. 92 # Create IDEAS_input_config.txt with the format required by IDEAS.
88 ideas_input_config = "IDEAS_input_config.txt" 93 ideas_input_config = "IDEAS_input_config.txt"
89 cmd = paste("paste -d' ' file1.txt file2.txt >", ideas_input_config, sep=" " ); 94 cmd = paste("paste -d' ' file1.txt file2.txt >", ideas_input_config, sep=" " );
90 system(cmd); 95 system(cmd);
91 # Move IDEAS_input_config.txt to the output directory. 96 # Move IDEAS_input_config.txt to the output directory.
92 to_path = paste(opt$output_files_path, ideas_input_config, sep="/"); 97 to_path = paste(opt$output_files_path, ideas_input_config, sep="/");
93 file.rename(ideas_input_config, to_path); 98 file.rename(ideas_input_config, to_path);
94 # Handle optional chrom_bed_input.txt and chromosomes.bed files. 99 # Archive the tmp directory.
100 cmd = "tar -cvf tmp.tar tmp";
101 system(cmd);
102 # Move the tmp archive to the output directory.
103 to_path = paste(opt$output_files_path, "tmp.tar", sep="/");
104 file.rename("tmp.tar", to_path);
105
95 if (!is.null(opt$chrom_bed_input) && !is.null(opt$chromosome_windows)) { 106 if (!is.null(opt$chrom_bed_input) && !is.null(opt$chromosome_windows)) {
96 # Renane opt$chrom_bed_input to be chromosomes.bed 107 # Renane opt$chrom_bed_input to be chromosomes.bed
97 # and make a copy of it in the output directory. 108 # and make a copy of it in the output directory.
98 to_path = paste(opt$output_files_path, "chromosomes.bed", sep="/"); 109 to_path = paste(opt$output_files_path, "chromosomes.bed", sep="/");
99 file.copy(opt$chrom_bed_input, to_path); 110 file.copy(opt$chrom_bed_input, to_path);