Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp @ 1:bec36315bd12 default tip
Deleted selected files
| author | aaronquinlan |
|---|---|
| date | Sat, 19 Nov 2011 14:17:03 -0500 |
| parents | dfcd8b6c1bda |
| children |
comparison
equal
deleted
inserted
replaced
| 0:dfcd8b6c1bda | 1:bec36315bd12 |
|---|---|
| 1 /***************************************************************************** | |
| 2 unionBedGraphs.cpp | |
| 3 | |
| 4 (c) 2010 - Assaf Gordon, CSHL | |
| 5 - Aaron Quinlan, UVA | |
| 6 Hall Laboratory | |
| 7 Department of Biochemistry and Molecular Genetics | |
| 8 University of Virginia | |
| 9 aaronquinlan@gmail.com | |
| 10 | |
| 11 Licenced under the GNU General Public License 2.0 license. | |
| 12 ******************************************************************************/ | |
| 13 #include <cassert> | |
| 14 #include <cstring> | |
| 15 #include <cstdlib> | |
| 16 #include <iostream> | |
| 17 #include <algorithm> | |
| 18 | |
| 19 #include "bedGraphFile.h" | |
| 20 #include "unionBedGraphs.h" | |
| 21 | |
| 22 using namespace std; | |
| 23 | |
| 24 | |
| 25 UnionBedGraphs::UnionBedGraphs(std::ostream& _output, | |
| 26 const vector<string>& _filenames, | |
| 27 const vector<string>& _titles, | |
| 28 bool _print_empty_regions, | |
| 29 const std::string& _genome_size_filename, | |
| 30 const std::string& _no_coverage_value ) : | |
| 31 filenames(_filenames), | |
| 32 titles(_titles), | |
| 33 output(_output), | |
| 34 current_non_zero_inputs(0), | |
| 35 print_empty_regions(_print_empty_regions), | |
| 36 genome_sizes(NULL), | |
| 37 no_coverage_value(_no_coverage_value) | |
| 38 { | |
| 39 if (print_empty_regions) { | |
| 40 assert(!_genome_size_filename.empty()); | |
| 41 | |
| 42 genome_sizes = new GenomeFile(_genome_size_filename); | |
| 43 } | |
| 44 } | |
| 45 | |
| 46 | |
| 47 UnionBedGraphs::~UnionBedGraphs() { | |
| 48 CloseBedgraphFiles(); | |
| 49 if (genome_sizes) { | |
| 50 delete genome_sizes; | |
| 51 genome_sizes = NULL ; | |
| 52 } | |
| 53 } | |
| 54 | |
| 55 | |
| 56 void UnionBedGraphs::Union() { | |
| 57 OpenBedgraphFiles(); | |
| 58 | |
| 59 // Add the first interval from each file | |
| 60 for(size_t i=0;i<bedgraph_files.size();++i) | |
| 61 LoadNextBedgraphItem(i); | |
| 62 | |
| 63 // Chromosome loop - once per chromosome | |
| 64 do { | |
| 65 // Find the first chromosome to use | |
| 66 current_chrom = DetermineNextChrom(); | |
| 67 | |
| 68 // Populate the queue with initial values from all files | |
| 69 // (if they belong to the correct chromosome) | |
| 70 for(size_t i=0;i<bedgraph_files.size();++i) | |
| 71 AddInterval(i); | |
| 72 | |
| 73 CHRPOS current_start = ConsumeNextCoordinate(); | |
| 74 | |
| 75 // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage | |
| 76 if (print_empty_regions && current_start > 0) | |
| 77 PrintEmptyCoverage(0,current_start); | |
| 78 | |
| 79 // Intervals loop - until all intervals (of current chromosome) from all files are used. | |
| 80 do { | |
| 81 CHRPOS current_end = queue.top().coord; | |
| 82 PrintCoverage(current_start, current_end); | |
| 83 current_start = ConsumeNextCoordinate(); | |
| 84 } while (!queue.empty()); | |
| 85 | |
| 86 // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome | |
| 87 // print a dummy empty coverage | |
| 88 if (print_empty_regions) { | |
| 89 CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom); | |
| 90 if (current_start < chrom_size) | |
| 91 PrintEmptyCoverage(current_start, chrom_size); | |
| 92 } | |
| 93 | |
| 94 } while (!AllFilesDone()); | |
| 95 } | |
| 96 | |
| 97 | |
| 98 CHRPOS UnionBedGraphs::ConsumeNextCoordinate() { | |
| 99 assert(!queue.empty()); | |
| 100 | |
| 101 CHRPOS new_position = queue.top().coord; | |
| 102 do { | |
| 103 IntervalItem item = queue.top(); | |
| 104 UpdateInformation(item); | |
| 105 queue.pop(); | |
| 106 } while (!queue.empty() && queue.top().coord == new_position); | |
| 107 | |
| 108 return new_position; | |
| 109 } | |
| 110 | |
| 111 | |
| 112 void UnionBedGraphs::UpdateInformation(const IntervalItem &item) { | |
| 113 // Update the depth coverage for this file | |
| 114 | |
| 115 // Which coordinate is it - start or end? | |
| 116 switch (item.coord_type) | |
| 117 { | |
| 118 case START: | |
| 119 current_depth[item.source_index] = item.depth; | |
| 120 current_non_zero_inputs++; | |
| 121 break; | |
| 122 case END: | |
| 123 //Read the next interval from this file | |
| 124 AddInterval(item.source_index); | |
| 125 current_depth[item.source_index] = no_coverage_value; | |
| 126 current_non_zero_inputs--; | |
| 127 break; | |
| 128 default: | |
| 129 assert(0); | |
| 130 } | |
| 131 } | |
| 132 | |
| 133 | |
| 134 void UnionBedGraphs::PrintHeader() { | |
| 135 output << "chrom\tstart\tend" ; | |
| 136 for (size_t i=0;i<titles.size();++i) | |
| 137 output << "\t" <<titles[i]; | |
| 138 output << endl; | |
| 139 } | |
| 140 | |
| 141 | |
| 142 void UnionBedGraphs::PrintCoverage(CHRPOS start, CHRPOS end) { | |
| 143 if ( current_non_zero_inputs == 0 && ! print_empty_regions ) | |
| 144 return ; | |
| 145 | |
| 146 output << current_chrom << "\t" | |
| 147 << start << "\t" | |
| 148 << end; | |
| 149 | |
| 150 for (size_t i=0;i<current_depth.size();++i) | |
| 151 output << "\t" << current_depth[i] ; | |
| 152 | |
| 153 output << endl; | |
| 154 } | |
| 155 | |
| 156 | |
| 157 void UnionBedGraphs::PrintEmptyCoverage(CHRPOS start, CHRPOS end) { | |
| 158 output << current_chrom << "\t" | |
| 159 << start << "\t" | |
| 160 << end; | |
| 161 | |
| 162 for (size_t i=0;i<current_depth.size();++i) | |
| 163 output << "\t" << no_coverage_value ; | |
| 164 | |
| 165 output << endl; | |
| 166 } | |
| 167 | |
| 168 | |
| 169 void UnionBedGraphs::LoadNextBedgraphItem(int index) { | |
| 170 assert(static_cast<unsigned int>(index) < bedgraph_files.size()); | |
| 171 | |
| 172 current_bedgraph_item[index].chrom=""; | |
| 173 | |
| 174 BedGraphFile *file = bedgraph_files[index]; | |
| 175 BEDGRAPH_STR bg; | |
| 176 int lineNum = 0; | |
| 177 BedGraphLineStatus status; | |
| 178 | |
| 179 while ( (status = file->GetNextBedGraph(bg, lineNum)) != BEDGRAPH_INVALID ) { | |
| 180 if (status != BEDGRAPH_VALID) | |
| 181 continue; | |
| 182 | |
| 183 current_bedgraph_item[index] = bg ; | |
| 184 break; | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 | |
| 189 bool UnionBedGraphs::AllFilesDone() { | |
| 190 for (size_t i=0;i<current_bedgraph_item.size();++i) | |
| 191 if (!current_bedgraph_item[i].chrom.empty()) | |
| 192 return false; | |
| 193 return true; | |
| 194 } | |
| 195 | |
| 196 | |
| 197 string UnionBedGraphs::DetermineNextChrom() { | |
| 198 string next_chrom; | |
| 199 for (size_t i=0;i<current_bedgraph_item.size();++i) { | |
| 200 if (current_bedgraph_item[i].chrom.empty()) | |
| 201 continue; | |
| 202 | |
| 203 if (next_chrom.empty()) | |
| 204 next_chrom = current_bedgraph_item[i].chrom; | |
| 205 else | |
| 206 if (current_bedgraph_item[i].chrom < next_chrom) | |
| 207 next_chrom = current_bedgraph_item[i].chrom ; | |
| 208 } | |
| 209 return next_chrom; | |
| 210 } | |
| 211 | |
| 212 | |
| 213 void UnionBedGraphs::AddInterval(int index) { | |
| 214 assert(static_cast<unsigned int>(index) < bedgraph_files.size()); | |
| 215 | |
| 216 //This file has no more intervals | |
| 217 if (current_bedgraph_item[index].chrom.empty()) | |
| 218 return ; | |
| 219 | |
| 220 //If the next interval belongs to a different chrom, don't add it | |
| 221 if (current_bedgraph_item[index].chrom!=current_chrom) | |
| 222 return ; | |
| 223 | |
| 224 const BEDGRAPH_STR &bg(current_bedgraph_item[index]); | |
| 225 | |
| 226 IntervalItem start_item(index, START, bg.start, bg.depth); | |
| 227 IntervalItem end_item(index, END, bg.end, bg.depth); | |
| 228 | |
| 229 queue.push(start_item); | |
| 230 queue.push(end_item); | |
| 231 | |
| 232 LoadNextBedgraphItem(index); | |
| 233 } | |
| 234 | |
| 235 | |
| 236 void UnionBedGraphs::OpenBedgraphFiles() { | |
| 237 for (size_t i=0;i<filenames.size();++i) { | |
| 238 BedGraphFile *file = new BedGraphFile(filenames[i]); | |
| 239 file->Open(); | |
| 240 bedgraph_files.push_back(file); | |
| 241 | |
| 242 current_depth.push_back(no_coverage_value); | |
| 243 } | |
| 244 current_bedgraph_item.resize(filenames.size()); | |
| 245 } | |
| 246 | |
| 247 | |
| 248 void UnionBedGraphs::CloseBedgraphFiles() { | |
| 249 for (size_t i=0;i<bedgraph_files.size();++i) { | |
| 250 BedGraphFile *file = bedgraph_files[i]; | |
| 251 delete file; | |
| 252 bedgraph_files[i] = NULL ; | |
| 253 } | |
| 254 bedgraph_files.clear(); | |
| 255 } |
