Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 bedFile.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licensed under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include "bedFile.h" | |
13 | |
14 | |
15 /************************************************ | |
16 Helper functions | |
17 *************************************************/ | |
18 void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks) { | |
19 | |
20 if (bed.otherFields.size() < 6) { | |
21 cerr << "Input error: Cannot split into blocks. Found interval with fewer than 12 columns on line " << lineNum << "." << endl; | |
22 exit(1); | |
23 } | |
24 | |
25 int blockCount = atoi(bed.otherFields[3].c_str()); | |
26 if ( blockCount <= 0 ) { | |
27 cerr << "Input error: found interval having <= 0 blocks on line " << lineNum << "." << endl; | |
28 exit(1); | |
29 } | |
30 else if ( blockCount == 1 ) { | |
31 //take a short-cut for single blocks | |
32 bedBlocks.push_back(bed); | |
33 } | |
34 else { | |
35 // get the comma-delimited strings for the BED12 block starts and block ends. | |
36 string blockSizes(bed.otherFields[4]); | |
37 string blockStarts(bed.otherFields[5]); | |
38 | |
39 vector<int> sizes; | |
40 vector<int> starts; | |
41 Tokenize(blockSizes, sizes, ","); | |
42 Tokenize(blockStarts, starts, ","); | |
43 | |
44 if ( sizes.size() != (size_t) blockCount || starts.size() != (size_t) blockCount ) { | |
45 cerr << "Input error: found interval with block-counts not matching starts/sizes on line " << lineNum << "." << endl; | |
46 exit(1); | |
47 } | |
48 | |
49 // add each BED block to the bedBlocks vector | |
50 for (UINT i = 0; i < (UINT) blockCount; ++i) { | |
51 CHRPOS blockStart = bed.start + starts[i]; | |
52 CHRPOS blockEnd = bed.start + starts[i] + sizes[i]; | |
53 BED currBedBlock(bed.chrom, blockStart, blockEnd, bed.name, bed.score, bed.strand, bed.otherFields); | |
54 bedBlocks.push_back(currBedBlock); | |
55 } | |
56 } | |
57 } | |
58 | |
59 | |
60 /*********************************************** | |
61 Sorting comparison functions | |
62 ************************************************/ | |
63 bool sortByChrom(BED const &a, BED const &b) { | |
64 if (a.chrom < b.chrom) return true; | |
65 else return false; | |
66 }; | |
67 | |
68 bool sortByStart(const BED &a, const BED &b) { | |
69 if (a.start < b.start) return true; | |
70 else return false; | |
71 }; | |
72 | |
73 bool sortBySizeAsc(const BED &a, const BED &b) { | |
74 | |
75 CHRPOS aLen = a.end - a.start; | |
76 CHRPOS bLen = b.end - b.start; | |
77 | |
78 if (aLen < bLen) return true; | |
79 else return false; | |
80 }; | |
81 | |
82 bool sortBySizeDesc(const BED &a, const BED &b) { | |
83 | |
84 CHRPOS aLen = a.end - a.start; | |
85 CHRPOS bLen = b.end - b.start; | |
86 | |
87 if (aLen > bLen) return true; | |
88 else return false; | |
89 }; | |
90 | |
91 bool sortByScoreAsc(const BED &a, const BED &b) { | |
92 if (a.score < b.score) return true; | |
93 else return false; | |
94 }; | |
95 | |
96 bool sortByScoreDesc(const BED &a, const BED &b) { | |
97 if (a.score > b.score) return true; | |
98 else return false; | |
99 }; | |
100 | |
101 bool byChromThenStart(BED const &a, BED const &b) { | |
102 | |
103 if (a.chrom < b.chrom) return true; | |
104 else if (a.chrom > b.chrom) return false; | |
105 | |
106 if (a.start < b.start) return true; | |
107 else if (a.start >= b.start) return false; | |
108 | |
109 return false; | |
110 }; | |
111 | |
112 | |
113 /******************************************* | |
114 Class methods | |
115 *******************************************/ | |
116 | |
117 // Constructor | |
118 BedFile::BedFile(string &bedFile) | |
119 : bedFile(bedFile), | |
120 _isGff(false), | |
121 _isVcf(false), | |
122 _typeIsKnown(false), | |
123 _merged_start(-1), | |
124 _merged_end(-1), | |
125 _merged_chrom(""), | |
126 _prev_start(-1), | |
127 _prev_chrom("") | |
128 {} | |
129 | |
130 // Destructor | |
131 BedFile::~BedFile(void) { | |
132 } | |
133 | |
134 | |
135 void BedFile::Open(void) { | |
136 | |
137 _bedFields.reserve(12); | |
138 | |
139 if (bedFile == "stdin" || bedFile == "-") { | |
140 _bedStream = &cin; | |
141 } | |
142 else { | |
143 _bedStream = new ifstream(bedFile.c_str(), ios::in); | |
144 | |
145 if( isGzipFile(_bedStream) ) { | |
146 delete _bedStream; | |
147 _bedStream = new igzstream(bedFile.c_str(), ios::in); | |
148 } | |
149 if ( !(_bedStream->good()) ) { | |
150 cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl; | |
151 exit (1); | |
152 } | |
153 } | |
154 } | |
155 | |
156 // Rewind the pointer back to the beginning of the file | |
157 void BedFile::Rewind(void) { | |
158 _bedStream->seekg(0, ios::beg); | |
159 } | |
160 | |
161 // Jump to a specific byte in the file | |
162 void BedFile::Seek(unsigned long offset) { | |
163 _bedStream->seekg(offset); | |
164 } | |
165 | |
166 // Jump to a specific byte in the file | |
167 bool BedFile::Empty() { | |
168 return _bedStream->eof(); | |
169 } | |
170 | |
171 | |
172 // Close the BED file | |
173 void BedFile::Close(void) { | |
174 if (bedFile != "stdin" && bedFile != "-") delete _bedStream; | |
175 } | |
176 | |
177 | |
178 BedLineStatus BedFile::GetNextBed(BED &bed, int &lineNum, bool forceSorted) { | |
179 | |
180 // make sure there are still lines to process. | |
181 // if so, tokenize, validate and return the BED entry. | |
182 _bedFields.clear(); | |
183 // clear out the previous bed's data | |
184 if (_bedStream->good()) { | |
185 // parse the bedStream pointer | |
186 getline(*_bedStream, _bedLine); | |
187 lineNum++; | |
188 | |
189 // split into a string vector. | |
190 Tokenize(_bedLine, _bedFields); | |
191 | |
192 // load the BED struct as long as it's a valid BED entry. | |
193 BedLineStatus status = parseLine(bed, _bedFields, lineNum); | |
194 if (!forceSorted) { | |
195 return status; | |
196 } | |
197 else if (status == BED_VALID) { | |
198 if (bed.chrom == _prev_chrom) { | |
199 if ((int) bed.start >= _prev_start) { | |
200 _prev_chrom = bed.chrom; | |
201 _prev_start = bed.start; | |
202 return status; | |
203 } | |
204 else { | |
205 cerr << "ERROR: input file: (" << bedFile << ") is not sorted by chrom then start" << endl; | |
206 exit(1); | |
207 } | |
208 } | |
209 else if (bed.chrom > _prev_chrom) { | |
210 _prev_chrom = bed.chrom; | |
211 _prev_start = bed.start; | |
212 return status; | |
213 } | |
214 else if (bed.chrom < _prev_chrom) { | |
215 cerr << "ERROR: input file: (" << bedFile << ") is not sorted by chrom then start" << endl; | |
216 exit(1); | |
217 } | |
218 } | |
219 } | |
220 | |
221 // default if file is closed or EOF | |
222 return BED_INVALID; | |
223 } | |
224 | |
225 | |
226 bool BedFile::GetNextMergedBed(BED &merged_bed, int &lineNum) { | |
227 | |
228 if (_bedStream->good()) { | |
229 BED bed; | |
230 BedLineStatus bedStatus; | |
231 // force sorting; hence third param = true | |
232 while ((bedStatus = GetNextBed(bed, lineNum, true)) != BED_INVALID) { | |
233 if (bedStatus == BED_VALID) { | |
234 if (((int) bed.start - _merged_end > 0) || | |
235 (_merged_end < 0) || | |
236 (bed.chrom != _merged_chrom)) | |
237 { | |
238 if (_merged_start >= 0) { | |
239 merged_bed.chrom = _merged_chrom; | |
240 merged_bed.start = _merged_start; | |
241 merged_bed.end = _merged_end; | |
242 | |
243 _merged_chrom = bed.chrom; | |
244 _merged_start = bed.start; | |
245 _merged_end = bed.end; | |
246 | |
247 return true; | |
248 } | |
249 else { | |
250 _merged_start = bed.start; | |
251 _merged_chrom = bed.chrom; | |
252 _merged_end = bed.end; | |
253 } | |
254 } | |
255 else if ((int) bed.end > _merged_end) | |
256 { | |
257 _merged_end = bed.end; | |
258 } | |
259 } | |
260 } | |
261 // handle the last merged block in the file. | |
262 if (bedStatus == BED_INVALID) | |
263 { | |
264 merged_bed.chrom = _merged_chrom; | |
265 merged_bed.start = _merged_start; | |
266 merged_bed.end = _merged_end; | |
267 return true; | |
268 } | |
269 } | |
270 return false; | |
271 } | |
272 | |
273 | |
274 void BedFile::FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, | |
275 string strand, vector<BED> &hits, bool sameStrand, bool diffStrand) { | |
276 | |
277 BIN startBin, endBin; | |
278 startBin = (start >> _binFirstShift); | |
279 endBin = ((end-1) >> _binFirstShift); | |
280 | |
281 // loop through each bin "level" in the binning hierarchy | |
282 for (BINLEVEL i = 0; i < _binLevels; ++i) { | |
283 | |
284 // loop through each bin at this level of the hierarchy | |
285 BIN offset = _binOffsetsExtended[i]; | |
286 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) { | |
287 | |
288 // loop through each feature in this chrom/bin and see if it overlaps | |
289 // with the feature that was passed in. if so, add the feature to | |
290 // the list of hits. | |
291 vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin(); | |
292 vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end(); | |
293 | |
294 for (; bedItr != bedEnd; ++bedItr) { | |
295 // do we have sufficient overlap? | |
296 if (overlaps(bedItr->start, bedItr->end, start, end) > 0) { | |
297 | |
298 bool strands_are_same = (strand == bedItr->strand); | |
299 | |
300 // test for necessary strandedness | |
301 if ( (sameStrand == false && diffStrand == false) | |
302 || | |
303 (sameStrand == true && strands_are_same == true) | |
304 || | |
305 (diffStrand == true && strands_are_same == false) | |
306 ) | |
307 { | |
308 hits.push_back(*bedItr); | |
309 } | |
310 } | |
311 } | |
312 } | |
313 startBin >>= _binNextShift; | |
314 endBin >>= _binNextShift; | |
315 } | |
316 } | |
317 | |
318 | |
319 bool BedFile::FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, | |
320 bool sameStrand, bool diffStrand, float overlapFraction) { | |
321 | |
322 BIN startBin, endBin; | |
323 startBin = (start >> _binFirstShift); | |
324 endBin = ((end-1) >> _binFirstShift); | |
325 | |
326 CHRPOS aLength = (end - start); | |
327 | |
328 // loop through each bin "level" in the binning hierarchy | |
329 for (BINLEVEL i = 0; i < _binLevels; ++i) { | |
330 | |
331 // loop through each bin at this level of the hierarchy | |
332 BIN offset = _binOffsetsExtended[i]; | |
333 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) { | |
334 | |
335 // loop through each feature in this chrom/bin and see if it overlaps | |
336 // with the feature that was passed in. if so, add the feature to | |
337 // the list of hits. | |
338 vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin(); | |
339 vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end(); | |
340 for (; bedItr != bedEnd; ++bedItr) { | |
341 | |
342 CHRPOS s = max(start, bedItr->start); | |
343 CHRPOS e = min(end, bedItr->end); | |
344 // the number of overlapping bases b/w a and b | |
345 int overlapBases = (e - s); | |
346 | |
347 // do we have sufficient overlap? | |
348 if ( (float) overlapBases / (float) aLength >= overlapFraction) { | |
349 | |
350 bool strands_are_same = (strand == bedItr->strand); | |
351 // test for necessary strandedness | |
352 if ( (sameStrand == false && diffStrand == false) | |
353 || | |
354 (sameStrand == true && strands_are_same == true) | |
355 || | |
356 (diffStrand == true && strands_are_same == false) | |
357 ) | |
358 { | |
359 return true; | |
360 } | |
361 } | |
362 } | |
363 } | |
364 startBin >>= _binNextShift; | |
365 endBin >>= _binNextShift; | |
366 } | |
367 return false; | |
368 } | |
369 | |
370 | |
371 bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, | |
372 bool sameStrand, bool diffStrand, float overlapFraction) { | |
373 | |
374 BIN startBin, endBin; | |
375 startBin = (start >> _binFirstShift); | |
376 endBin = ((end-1) >> _binFirstShift); | |
377 | |
378 CHRPOS aLength = (end - start); | |
379 | |
380 // loop through each bin "level" in the binning hierarchy | |
381 for (BINLEVEL i = 0; i < _binLevels; ++i) { | |
382 | |
383 // loop through each bin at this level of the hierarchy | |
384 BIN offset = _binOffsetsExtended[i]; | |
385 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) { | |
386 | |
387 // loop through each feature in this chrom/bin and see if it overlaps | |
388 // with the feature that was passed in. if so, add the feature to | |
389 // the list of hits. | |
390 vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin(); | |
391 vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end(); | |
392 for (; bedItr != bedEnd; ++bedItr) { | |
393 CHRPOS s = max(start, bedItr->start); | |
394 CHRPOS e = min(end, bedItr->end); | |
395 | |
396 // the number of overlapping bases b/w a and b | |
397 int overlapBases = (e - s); | |
398 | |
399 // do we have sufficient overlap? | |
400 if ( (float) overlapBases / (float) aLength >= overlapFraction) { | |
401 CHRPOS bLength = (bedItr->end - bedItr->start); | |
402 float bOverlap = ( (float) overlapBases / (float) bLength ); | |
403 bool strands_are_same = (strand == bedItr->strand); | |
404 | |
405 // test for sufficient reciprocal overlap and strandedness | |
406 if ( (bOverlap >= overlapFraction) && | |
407 ((sameStrand == false && diffStrand == false) | |
408 || | |
409 (sameStrand == true && strands_are_same == true) | |
410 || | |
411 (diffStrand == true && strands_are_same == false)) | |
412 ) | |
413 { | |
414 return true; | |
415 } | |
416 } | |
417 } | |
418 } | |
419 startBin >>= _binNextShift; | |
420 endBin >>= _binNextShift; | |
421 } | |
422 return false; | |
423 } | |
424 | |
425 | |
426 void BedFile::countHits(const BED &a, bool sameStrand, bool diffStrand, bool countsOnly) { | |
427 | |
428 BIN startBin, endBin; | |
429 startBin = (a.start >> _binFirstShift); | |
430 endBin = ((a.end-1) >> _binFirstShift); | |
431 | |
432 // loop through each bin "level" in the binning hierarchy | |
433 for (BINLEVEL i = 0; i < _binLevels; ++i) { | |
434 | |
435 // loop through each bin at this level of the hierarchy | |
436 BIN offset = _binOffsetsExtended[i]; | |
437 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) { | |
438 | |
439 // loop through each feature in this chrom/bin and see if it overlaps | |
440 // with the feature that was passed in. if so, add the feature to | |
441 // the list of hits. | |
442 vector<BEDCOV>::iterator bedItr = bedCovMap[a.chrom][j].begin(); | |
443 vector<BEDCOV>::iterator bedEnd = bedCovMap[a.chrom][j].end(); | |
444 for (; bedItr != bedEnd; ++bedItr) { | |
445 | |
446 bool strands_are_same = (a.strand == bedItr->strand); | |
447 // skip the hit if not on the same strand (and we care) | |
448 if ((sameStrand == true && strands_are_same == false) || | |
449 (diffStrand == true && strands_are_same == true) | |
450 ) | |
451 { | |
452 continue; | |
453 } | |
454 else if (overlaps(bedItr->start, bedItr->end, a.start, a.end) > 0) { | |
455 | |
456 bedItr->count++; | |
457 if (countsOnly == false) { | |
458 if (a.zeroLength == false) { | |
459 bedItr->depthMap[a.start+1].starts++; | |
460 bedItr->depthMap[a.end].ends++; | |
461 } | |
462 else { | |
463 // correct for the fact that we artificially expanded the zeroLength feature | |
464 bedItr->depthMap[a.start+2].starts++; | |
465 bedItr->depthMap[a.end-1].ends++; | |
466 } | |
467 | |
468 if (a.start < bedItr->minOverlapStart) { | |
469 bedItr->minOverlapStart = a.start; | |
470 } | |
471 } | |
472 } | |
473 } | |
474 } | |
475 startBin >>= _binNextShift; | |
476 endBin >>= _binNextShift; | |
477 } | |
478 } | |
479 | |
480 | |
481 void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool sameStrand, bool diffStrand, bool countsOnly) { | |
482 | |
483 // set to track the distinct B features that had coverage. | |
484 // we'll update the counts of coverage for these features by one | |
485 // at the end of this function to avoid over-counting. | |
486 set< vector<BEDCOV>::iterator > validHits; | |
487 | |
488 vector<BED>::const_iterator blockItr = bedBlocks.begin(); | |
489 vector<BED>::const_iterator blockEnd = bedBlocks.end(); | |
490 for (; blockItr != blockEnd; ++blockItr) { | |
491 | |
492 BIN startBin, endBin; | |
493 startBin = (blockItr->start >> _binFirstShift); | |
494 endBin = ((blockItr->end-1) >> _binFirstShift); | |
495 | |
496 // loop through each bin "level" in the binning hierarchy | |
497 for (BINLEVEL i = 0; i < _binLevels; ++i) { | |
498 | |
499 // loop through each bin at this level of the hierarchy | |
500 BIN offset = _binOffsetsExtended[i]; | |
501 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) { | |
502 | |
503 // loop through each feature in this chrom/bin and see if it overlaps | |
504 // with the feature that was passed in. if so, add the feature to | |
505 // the list of hits. | |
506 vector<BEDCOV>::iterator bedItr = bedCovMap[blockItr->chrom][j].begin(); | |
507 vector<BEDCOV>::iterator bedEnd = bedCovMap[blockItr->chrom][j].end(); | |
508 for (; bedItr != bedEnd; ++bedItr) { | |
509 | |
510 bool strands_are_same = (blockItr->strand == bedItr->strand); | |
511 // skip the hit if not on the same strand (and we care) | |
512 if ((sameStrand == true && strands_are_same == false) || | |
513 (diffStrand == true && strands_are_same == true) | |
514 ) | |
515 { | |
516 continue; | |
517 } | |
518 else if (overlaps(bedItr->start, bedItr->end, blockItr->start, blockItr->end) > 0) { | |
519 if (countsOnly == false) { | |
520 if (blockItr->zeroLength == false) { | |
521 bedItr->depthMap[blockItr->start+1].starts++; | |
522 bedItr->depthMap[blockItr->end].ends++; | |
523 } | |
524 else { | |
525 // correct for the fact that we artificially expanded the zeroLength feature | |
526 bedItr->depthMap[blockItr->start+2].starts++; | |
527 bedItr->depthMap[blockItr->end-1].ends++; | |
528 } | |
529 } | |
530 | |
531 validHits.insert(bedItr); | |
532 if (blockItr->start < bedItr->minOverlapStart) | |
533 bedItr->minOverlapStart = blockItr->start; | |
534 } | |
535 } | |
536 } | |
537 startBin >>= _binNextShift; | |
538 endBin >>= _binNextShift; | |
539 } | |
540 } | |
541 // incrment the count of overlap by one for each B feature that overlapped | |
542 // the current passed hit. This is necessary to prevent over-counting for | |
543 // each "split"" of a single read. | |
544 set< vector<BEDCOV>::iterator >::iterator validHitsItr = validHits.begin(); | |
545 set< vector<BEDCOV>::iterator >::iterator validHitsEnd = validHits.end(); | |
546 for (; validHitsItr != validHitsEnd; ++validHitsItr) | |
547 // the validHitsItr points to another itr, hence the (*itr)-> dereferencing. | |
548 // ugly, but that's C++. | |
549 (*validHitsItr)->count++; | |
550 } | |
551 | |
552 | |
553 void BedFile::countListHits(const BED &a, int index, bool sameStrand, bool diffStrand) { | |
554 | |
555 BIN startBin, endBin; | |
556 startBin = (a.start >> _binFirstShift); | |
557 endBin = ((a.end-1) >> _binFirstShift); | |
558 | |
559 // loop through each bin "level" in the binning hierarchy | |
560 for (BINLEVEL i = 0; i < _binLevels; ++i) { | |
561 | |
562 // loop through each bin at this level of the hierarchy | |
563 BIN offset = _binOffsetsExtended[i]; | |
564 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) { | |
565 | |
566 // loop through each feature in this chrom/bin and see if it overlaps | |
567 // with the feature that was passed in. if so, add the feature to | |
568 // the list of hits. | |
569 vector<BEDCOVLIST>::iterator bedItr = bedCovListMap[a.chrom][j].begin(); | |
570 vector<BEDCOVLIST>::iterator bedEnd = bedCovListMap[a.chrom][j].end(); | |
571 for (; bedItr != bedEnd; ++bedItr) { | |
572 | |
573 bool strands_are_same = (a.strand == bedItr->strand); | |
574 // skip the hit if not on the same strand (and we care) | |
575 if ((sameStrand == true && strands_are_same == false) || | |
576 (diffStrand == true && strands_are_same == true) | |
577 ) | |
578 { | |
579 continue; | |
580 } | |
581 else if (overlaps(bedItr->start, bedItr->end, a.start, a.end) > 0) { | |
582 bedItr->counts[index]++; | |
583 if (a.zeroLength == false) { | |
584 bedItr->depthMapList[index][a.start+1].starts++; | |
585 bedItr->depthMapList[index][a.end].ends++; | |
586 } | |
587 else { | |
588 // correct for the fact that we artificially expanded the zeroLength feature | |
589 bedItr->depthMapList[index][a.start+2].starts++; | |
590 bedItr->depthMapList[index][a.end-1].ends++; | |
591 } | |
592 | |
593 if (a.start < bedItr->minOverlapStarts[index]) { | |
594 bedItr->minOverlapStarts[index] = a.start; | |
595 } | |
596 } | |
597 } | |
598 } | |
599 startBin >>= _binNextShift; | |
600 endBin >>= _binNextShift; | |
601 } | |
602 } | |
603 | |
604 void BedFile::setZeroBased(bool zeroBased) { this->isZeroBased = zeroBased; } | |
605 | |
606 void BedFile::setGff (bool gff) { this->_isGff = gff; } | |
607 | |
608 | |
609 void BedFile::setVcf (bool vcf) { this->_isVcf = vcf; } | |
610 | |
611 | |
612 void BedFile::setFileType (FileType type) { | |
613 _fileType = type; | |
614 _typeIsKnown = true; | |
615 } | |
616 | |
617 | |
618 void BedFile::setBedType (int colNums) { | |
619 bedType = colNums; | |
620 } | |
621 | |
622 | |
623 void BedFile::loadBedFileIntoMap() { | |
624 | |
625 BED bedEntry, nullBed; | |
626 int lineNum = 0; | |
627 BedLineStatus bedStatus; | |
628 | |
629 Open(); | |
630 while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
631 if (bedStatus == BED_VALID) { | |
632 BIN bin = getBin(bedEntry.start, bedEntry.end); | |
633 bedMap[bedEntry.chrom][bin].push_back(bedEntry); | |
634 bedEntry = nullBed; | |
635 } | |
636 } | |
637 Close(); | |
638 } | |
639 | |
640 | |
641 void BedFile::loadBedCovFileIntoMap() { | |
642 | |
643 BED bedEntry, nullBed; | |
644 int lineNum = 0; | |
645 BedLineStatus bedStatus; | |
646 | |
647 Open(); | |
648 while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
649 if (bedStatus == BED_VALID) { | |
650 BIN bin = getBin(bedEntry.start, bedEntry.end); | |
651 | |
652 BEDCOV bedCov; | |
653 bedCov.chrom = bedEntry.chrom; | |
654 bedCov.start = bedEntry.start; | |
655 bedCov.end = bedEntry.end; | |
656 bedCov.name = bedEntry.name; | |
657 bedCov.score = bedEntry.score; | |
658 bedCov.strand = bedEntry.strand; | |
659 bedCov.otherFields = bedEntry.otherFields; | |
660 bedCov.zeroLength = bedEntry.zeroLength; | |
661 bedCov.count = 0; | |
662 bedCov.minOverlapStart = INT_MAX; | |
663 | |
664 bedCovMap[bedEntry.chrom][bin].push_back(bedCov); | |
665 bedEntry = nullBed; | |
666 } | |
667 } | |
668 Close(); | |
669 } | |
670 | |
671 void BedFile::loadBedCovListFileIntoMap() { | |
672 | |
673 BED bedEntry, nullBed; | |
674 int lineNum = 0; | |
675 BedLineStatus bedStatus; | |
676 | |
677 Open(); | |
678 while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
679 if (bedStatus == BED_VALID) { | |
680 BIN bin = getBin(bedEntry.start, bedEntry.end); | |
681 | |
682 BEDCOVLIST bedCovList; | |
683 bedCovList.chrom = bedEntry.chrom; | |
684 bedCovList.start = bedEntry.start; | |
685 bedCovList.end = bedEntry.end; | |
686 bedCovList.name = bedEntry.name; | |
687 bedCovList.score = bedEntry.score; | |
688 bedCovList.strand = bedEntry.strand; | |
689 bedCovList.otherFields = bedEntry.otherFields; | |
690 bedCovList.zeroLength = bedEntry.zeroLength; | |
691 | |
692 bedCovListMap[bedEntry.chrom][bin].push_back(bedCovList); | |
693 bedEntry = nullBed; | |
694 } | |
695 } | |
696 Close(); | |
697 } | |
698 | |
699 | |
700 void BedFile::loadBedFileIntoMapNoBin() { | |
701 | |
702 BED bedEntry, nullBed; | |
703 int lineNum = 0; | |
704 BedLineStatus bedStatus; | |
705 | |
706 Open(); | |
707 while ((bedStatus = this->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
708 if (bedStatus == BED_VALID) { | |
709 bedMapNoBin[bedEntry.chrom].push_back(bedEntry); | |
710 bedEntry = nullBed; | |
711 } | |
712 } | |
713 Close(); | |
714 | |
715 // sort the BED entries for each chromosome | |
716 // in ascending order of start position | |
717 for (masterBedMapNoBin::iterator m = this->bedMapNoBin.begin(); m != this->bedMapNoBin.end(); ++m) { | |
718 sort(m->second.begin(), m->second.end(), sortByStart); | |
719 } | |
720 } |