annotate BEDTools-Version-2.14.3/src/fjoin/fjoin.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 intersectBed.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "fjoin.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14 #include <queue>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 #include <set>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 bool leftOf(const BED &a, const BED &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 bool BedIntersect::processHits(BED &a, vector<BED> &hits) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 // how many overlaps are there b/w the bed and the set of hits?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 int s, e, overlapBases;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 int numOverlaps = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 bool hitsFound = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 int aLength = (a.end - a.start); // the length of a in b.p.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 // loop through the hits and report those that meet the user's criteria
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 vector<BED>::const_iterator h = hits.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 vector<BED>::const_iterator hitsEnd = hits.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 for (; h != hitsEnd; ++h) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 s = max(a.start, h->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 e = min(a.end, h->end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 overlapBases = (e - s); // the number of overlapping bases b/w a and b
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 // is there enough overlap relative to the user's request? (default ~ 1bp)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 // Report the hit if the user doesn't care about reciprocal overlap between A and B.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 if (_reciprocal == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 hitsFound = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 numOverlaps++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 if (_printable == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 ReportOverlapDetail(overlapBases, a, *h, s, e);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 // we require there to be sufficient __reciprocal__ overlap
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 int bLength = (h->end - h->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 float bOverlap = ( (float) overlapBases / (float) bLength );
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 if (bOverlap >= _overlapFraction) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 hitsFound = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 numOverlaps++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 if (_printable == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 ReportOverlapDetail(overlapBases, a, *h, s, e);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 // report the summary of the overlaps if requested.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 ReportOverlapSummary(a, numOverlaps);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 // were hits found for this BED feature?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 return hitsFound;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 float overlapFraction, bool noHit, bool writeCount, bool forceStrand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 _bedAFile = bedAFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 _bedBFile = bedBFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 _anyHit = anyHit;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 _noHit = noHit;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 _writeA = writeA;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 _writeB = writeB;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 _writeOverlap = writeOverlap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 _writeAllOverlap = writeAllOverlap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 _writeCount = writeCount;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 _overlapFraction = overlapFraction;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 _forceStrand = forceStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 _reciprocal = reciprocal;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 _obeySplits = obeySplits;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 _bamInput = bamInput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 _bamOutput = bamOutput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 if (_anyHit || _noHit || _writeCount)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 _printable = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 _printable = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 // create new BED file objects for A and B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 _bedA = new BedFile(bedAFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 _bedB = new BedFile(bedBFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 IntersectBed();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 BedIntersect::~BedIntersect(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 bool leftOf(const BED &a, const BED &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 return (a.end <= b.start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 void BedIntersect::ReportOverlapDetail(const int &overlapBases, const BED &a, const BED &b,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 const CHRPOS &s, const CHRPOS &e) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 // default. simple intersection only
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 if (_writeA == false && _writeB == false && _writeOverlap == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 _bedA->reportBedRangeNewLine(a,s,e);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 // -wa -wbwrite the original A and B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 else if (_writeA == true && _writeB == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 _bedA->reportBedTab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 _bedB->reportBedNewLine(b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 // -wa write just the original A
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 else if (_writeA == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 _bedA->reportBedNewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 // -wb write the intersected portion of A and the original B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 else if (_writeB == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 _bedA->reportBedRangeTab(a,s,e);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 _bedB->reportBedNewLine(b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 // -wo write the original A and B plus the no. of overlapping bases.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 else if (_writeOverlap == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 _bedA->reportBedTab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 _bedB->reportBedTab(b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 printf("%d\n", overlapBases);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 void BedIntersect::ReportOverlapSummary(const BED &a, const int &numOverlapsFound) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 // -u just report the fact that there was >= 1 overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 if (_anyHit && (numOverlapsFound >= 1)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 _bedA->reportBedNewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 // -c report the total number of features overlapped in B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 else if (_writeCount) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 _bedA->reportBedTab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 printf("%d\n", numOverlapsFound);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 // -v report iff there were no overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 else if (_noHit && (numOverlapsFound == 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 _bedA->reportBedNewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 // -wao the user wants to force the reporting of 0 overlap
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 else if (_writeAllOverlap && (numOverlapsFound == 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 _bedA->reportBedTab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 _bedB->reportNullBedTab();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 printf("0\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 void BedIntersect::Scan(BED *x, vector<BED *> *windowX, BedLineStatus xStatus,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 const BED &y, vector<BED *> *windowY, BedLineStatus yStatus) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 if (xStatus != BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 return;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 std::vector<BED *>::iterator wYIter = windowY->begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 while (wYIter != windowY->end()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 if (leftOf(*(*wYIter), *x) == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 (*wYIter)->finished = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 wYIter = windowY->erase(wYIter); // erase auto-increments to the next position
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 else if (overlaps((*wYIter)->start, (*wYIter)->end, x->start, x->end) > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 if (_lastPick == 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 AddHits(x, *(*wYIter));
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 AddHits(*wYIter, *x);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185 ++wYIter; // force incrementing
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 if (leftOf(*x,y) == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 windowX->push_back(x);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191 x->finished = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 // dump the buffered results (if any)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194 FlushOutputBuffer();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 void BedIntersect::AddHits(BED *x, const BED &y) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 if (x->overlaps.empty() == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 _outputBuffer.push(x);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201 x->overlaps.push_back(y);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205 void BedIntersect::FlushOutputBuffer(bool final) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 while (_outputBuffer.empty() == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208 if (final == false && _outputBuffer.front()->finished == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211 processHits(*_outputBuffer.front(), _outputBuffer.front()->overlaps);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212 // remove the finished BED entry from the heap
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213 delete _outputBuffer.front();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214 _outputBuffer.pop();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 vector<BED*>* BedIntersect::GetWindow(const string &chrom, bool isA) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221 // iterator to test if a window for a given chrom exists.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222 map<string, vector<BED*> >::iterator it;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224 // grab the current window for A or B, depending on
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225 // the request. if a window hasn't yet been created
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 // for the requested chrom, create one.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228 if (isA) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229 it = _windowA.find(chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 if (it != _windowA.end()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231 return & _windowA[chrom];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 _windowA.insert(pair<string, vector<BED *> >(chrom, vector<BED *>()));
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 return & _windowA[chrom];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 it = _windowB.find(chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 if (it != _windowB.end()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241 return & _windowB[chrom];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244 _windowB.insert(pair<string, vector<BED *> >(chrom, vector<BED *>()));
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245 return & _windowB[chrom];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251 void BedIntersect::ChromSwitch(const string &chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253 vector<BED*>::iterator windowAIter = _windowA[chrom].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254 vector<BED*>::iterator windowAEnd = _windowA[chrom].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 for (; windowAIter != windowAEnd; ++windowAIter)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256 (*windowAIter)->finished = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258 vector<BED*>::iterator windowBIter = _windowB[chrom].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259 vector<BED*>::iterator windowBEnd = _windowB[chrom].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260 for (; windowBIter != windowBEnd; ++windowBIter)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261 (*windowBIter)->finished = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 FlushOutputBuffer();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267 void BedIntersect::IntersectBed() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269 int aLineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270 int bLineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272 // current feature from each file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273 BED *a, *b, *prevA, *prevB;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 // status of the current lines
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276 BedLineStatus aStatus, bStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278 // open the files; get the first line from each
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 _bedA->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280 _bedB->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 prevA = NULL;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283 prevB = NULL;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 a = new BED();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285 b = new BED();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286 aStatus = _bedA->GetNextBed(*a, aLineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287 bStatus = _bedB->GetNextBed(*b, bLineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289 cout << a->chrom << " " << a->start << " " << a->chrom << " " << b->start << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
290 while (aStatus != BED_INVALID || bStatus != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
291
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
292 if ((a->start <= b->start) && (a->chrom == b->chrom)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
293 prevA = a;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
294 _lastPick = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
295 Scan(a, GetWindow(a->chrom, true), aStatus,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
296 *b, GetWindow(a->chrom, false), bStatus);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
297
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
298 a = new BED();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
299 aStatus = _bedA->GetNextBed(*a, aLineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
300 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
301 else if ((a->start > b->start) && (a->chrom == b->chrom)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
302 prevB = b;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
303 _lastPick = 1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
304 Scan(b, GetWindow(b->chrom, false), bStatus,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
305 *a, GetWindow(b->chrom, true), aStatus);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
306
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
307 b = new BED();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
308 bStatus = _bedB->GetNextBed(*b, bLineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
309 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
310 else if (a->chrom != b->chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
311 // A was most recently read
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
312 if (_lastPick == 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
313 prevB = b;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
314 while (b->chrom == prevA->chrom){
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
315 _windowB[prevA->chrom].push_back(b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
316 b = new BED();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
317 bStatus = _bedB->GetNextBed(*b, bLineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
318 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
319 Scan(prevA, GetWindow(prevA->chrom, true), aStatus,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
320 *prevB, GetWindow(prevA->chrom, false), bStatus);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
321 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
322 // B was most recently read
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
323 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
324 prevA = a;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
325 while (a->chrom == prevB->chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
326 _windowA[prevB->chrom].push_back(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
327 a = new BED();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
328 aStatus = _bedA->GetNextBed(*a, aLineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
329 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
330 Scan(prevB, GetWindow(prevB->chrom, false), bStatus,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
331 *prevA, GetWindow(prevB->chrom, true), aStatus);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
332 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
333 FlushOutputBuffer(true);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
334 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
335 if (prevA!=NULL&&prevB!=NULL)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
336 //cout << prevA->chrom << " " << a->chrom << " " << a->start << " "
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
337 // << prevB->chrom << " " << b->chrom << " " << b->start << "\n";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
338 if (aStatus == BED_INVALID) a->start = INT_MAX;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
339 if (bStatus == BED_INVALID) b->start = INT_MAX;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
340 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
341
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
342 // clear out the final bit of staged output
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
343 FlushOutputBuffer(true);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
344
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
345 // close the files
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
346 _bedA->Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
347 _bedB->Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
348 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
349
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
350