comparison BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 unionBedGraphs.cpp
3
4 (c) 2010 - Assaf Gordon, CSHL
5 - Aaron Quinlan, UVA
6 Hall Laboratory
7 Department of Biochemistry and Molecular Genetics
8 University of Virginia
9 aaronquinlan@gmail.com
10
11 Licenced under the GNU General Public License 2.0 license.
12 ******************************************************************************/
13 #include <cassert>
14 #include <cstring>
15 #include <cstdlib>
16 #include <iostream>
17 #include <algorithm>
18
19 #include "bedGraphFile.h"
20 #include "unionBedGraphs.h"
21
22 using namespace std;
23
24
25 UnionBedGraphs::UnionBedGraphs(std::ostream& _output,
26 const vector<string>& _filenames,
27 const vector<string>& _titles,
28 bool _print_empty_regions,
29 const std::string& _genome_size_filename,
30 const std::string& _no_coverage_value ) :
31 filenames(_filenames),
32 titles(_titles),
33 output(_output),
34 current_non_zero_inputs(0),
35 print_empty_regions(_print_empty_regions),
36 genome_sizes(NULL),
37 no_coverage_value(_no_coverage_value)
38 {
39 if (print_empty_regions) {
40 assert(!_genome_size_filename.empty());
41
42 genome_sizes = new GenomeFile(_genome_size_filename);
43 }
44 }
45
46
47 UnionBedGraphs::~UnionBedGraphs() {
48 CloseBedgraphFiles();
49 if (genome_sizes) {
50 delete genome_sizes;
51 genome_sizes = NULL ;
52 }
53 }
54
55
56 void UnionBedGraphs::Union() {
57 OpenBedgraphFiles();
58
59 // Add the first interval from each file
60 for(size_t i=0;i<bedgraph_files.size();++i)
61 LoadNextBedgraphItem(i);
62
63 // Chromosome loop - once per chromosome
64 do {
65 // Find the first chromosome to use
66 current_chrom = DetermineNextChrom();
67
68 // Populate the queue with initial values from all files
69 // (if they belong to the correct chromosome)
70 for(size_t i=0;i<bedgraph_files.size();++i)
71 AddInterval(i);
72
73 CHRPOS current_start = ConsumeNextCoordinate();
74
75 // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage
76 if (print_empty_regions && current_start > 0)
77 PrintEmptyCoverage(0,current_start);
78
79 // Intervals loop - until all intervals (of current chromosome) from all files are used.
80 do {
81 CHRPOS current_end = queue.top().coord;
82 PrintCoverage(current_start, current_end);
83 current_start = ConsumeNextCoordinate();
84 } while (!queue.empty());
85
86 // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome
87 // print a dummy empty coverage
88 if (print_empty_regions) {
89 CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom);
90 if (current_start < chrom_size)
91 PrintEmptyCoverage(current_start, chrom_size);
92 }
93
94 } while (!AllFilesDone());
95 }
96
97
98 CHRPOS UnionBedGraphs::ConsumeNextCoordinate() {
99 assert(!queue.empty());
100
101 CHRPOS new_position = queue.top().coord;
102 do {
103 IntervalItem item = queue.top();
104 UpdateInformation(item);
105 queue.pop();
106 } while (!queue.empty() && queue.top().coord == new_position);
107
108 return new_position;
109 }
110
111
112 void UnionBedGraphs::UpdateInformation(const IntervalItem &item) {
113 // Update the depth coverage for this file
114
115 // Which coordinate is it - start or end?
116 switch (item.coord_type)
117 {
118 case START:
119 current_depth[item.source_index] = item.depth;
120 current_non_zero_inputs++;
121 break;
122 case END:
123 //Read the next interval from this file
124 AddInterval(item.source_index);
125 current_depth[item.source_index] = no_coverage_value;
126 current_non_zero_inputs--;
127 break;
128 default:
129 assert(0);
130 }
131 }
132
133
134 void UnionBedGraphs::PrintHeader() {
135 output << "chrom\tstart\tend" ;
136 for (size_t i=0;i<titles.size();++i)
137 output << "\t" <<titles[i];
138 output << endl;
139 }
140
141
142 void UnionBedGraphs::PrintCoverage(CHRPOS start, CHRPOS end) {
143 if ( current_non_zero_inputs == 0 && ! print_empty_regions )
144 return ;
145
146 output << current_chrom << "\t"
147 << start << "\t"
148 << end;
149
150 for (size_t i=0;i<current_depth.size();++i)
151 output << "\t" << current_depth[i] ;
152
153 output << endl;
154 }
155
156
157 void UnionBedGraphs::PrintEmptyCoverage(CHRPOS start, CHRPOS end) {
158 output << current_chrom << "\t"
159 << start << "\t"
160 << end;
161
162 for (size_t i=0;i<current_depth.size();++i)
163 output << "\t" << no_coverage_value ;
164
165 output << endl;
166 }
167
168
169 void UnionBedGraphs::LoadNextBedgraphItem(int index) {
170 assert(static_cast<unsigned int>(index) < bedgraph_files.size());
171
172 current_bedgraph_item[index].chrom="";
173
174 BedGraphFile *file = bedgraph_files[index];
175 BEDGRAPH_STR bg;
176 int lineNum = 0;
177 BedGraphLineStatus status;
178
179 while ( (status = file->GetNextBedGraph(bg, lineNum)) != BEDGRAPH_INVALID ) {
180 if (status != BEDGRAPH_VALID)
181 continue;
182
183 current_bedgraph_item[index] = bg ;
184 break;
185 }
186 }
187
188
189 bool UnionBedGraphs::AllFilesDone() {
190 for (size_t i=0;i<current_bedgraph_item.size();++i)
191 if (!current_bedgraph_item[i].chrom.empty())
192 return false;
193 return true;
194 }
195
196
197 string UnionBedGraphs::DetermineNextChrom() {
198 string next_chrom;
199 for (size_t i=0;i<current_bedgraph_item.size();++i) {
200 if (current_bedgraph_item[i].chrom.empty())
201 continue;
202
203 if (next_chrom.empty())
204 next_chrom = current_bedgraph_item[i].chrom;
205 else
206 if (current_bedgraph_item[i].chrom < next_chrom)
207 next_chrom = current_bedgraph_item[i].chrom ;
208 }
209 return next_chrom;
210 }
211
212
213 void UnionBedGraphs::AddInterval(int index) {
214 assert(static_cast<unsigned int>(index) < bedgraph_files.size());
215
216 //This file has no more intervals
217 if (current_bedgraph_item[index].chrom.empty())
218 return ;
219
220 //If the next interval belongs to a different chrom, don't add it
221 if (current_bedgraph_item[index].chrom!=current_chrom)
222 return ;
223
224 const BEDGRAPH_STR &bg(current_bedgraph_item[index]);
225
226 IntervalItem start_item(index, START, bg.start, bg.depth);
227 IntervalItem end_item(index, END, bg.end, bg.depth);
228
229 queue.push(start_item);
230 queue.push(end_item);
231
232 LoadNextBedgraphItem(index);
233 }
234
235
236 void UnionBedGraphs::OpenBedgraphFiles() {
237 for (size_t i=0;i<filenames.size();++i) {
238 BedGraphFile *file = new BedGraphFile(filenames[i]);
239 file->Open();
240 bedgraph_files.push_back(file);
241
242 current_depth.push_back(no_coverage_value);
243 }
244 current_bedgraph_item.resize(filenames.size());
245 }
246
247
248 void UnionBedGraphs::CloseBedgraphFiles() {
249 for (size_t i=0;i<bedgraph_files.size();++i) {
250 BedGraphFile *file = bedgraph_files[i];
251 delete file;
252 bedgraph_files[i] = NULL ;
253 }
254 bedgraph_files.clear();
255 }