annotate BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 intersectBed.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "intersectBed.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 /************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 Helper functions
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 ************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 bool BedIntersect::processHits(const BED &a, const vector<BED> &hits) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 // how many overlaps are there b/w the bed and the set of hits?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 CHRPOS s, e;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 int overlapBases;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 int numOverlaps = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 bool hitsFound = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 int aLength = (a.end - a.start); // the length of a in b.p.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 // loop through the hits and report those that meet the user's criteria
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 vector<BED>::const_iterator h = hits.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 vector<BED>::const_iterator hitsEnd = hits.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 for (; h != hitsEnd; ++h) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 s = max(a.start, h->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 e = min(a.end, h->end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 overlapBases = (e - s); // the number of overlapping bases b/w a and b
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 // is there enough overlap relative to the user's request? (default ~ 1bp)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 // Report the hit if the user doesn't care about reciprocal overlap between A and B.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 if (_reciprocal == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 hitsFound = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 numOverlaps++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 if (_printable == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 ReportOverlapDetail(overlapBases, a, *h, s, e);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 // we require there to be sufficient __reciprocal__ overlap
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 int bLength = (h->end - h->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 float bOverlap = ( (float) overlapBases / (float) bLength );
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 if (bOverlap >= _overlapFraction) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 hitsFound = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 numOverlaps++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 if (_printable == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 ReportOverlapDetail(overlapBases, a, *h, s, e);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 // report the summary of the overlaps if requested.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 ReportOverlapSummary(a, numOverlaps);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 // were hits found for this BED feature?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 return hitsFound;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 bool sortedInput) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 _bedAFile = bedAFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 _bedBFile = bedBFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 _anyHit = anyHit;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 _noHit = noHit;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 _writeA = writeA;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 _writeB = writeB;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 _writeOverlap = writeOverlap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 _writeAllOverlap = writeAllOverlap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 _writeCount = writeCount;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 _overlapFraction = overlapFraction;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 _sameStrand = sameStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 _diffStrand = diffStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 _reciprocal = reciprocal;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 _obeySplits = obeySplits;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 _bamInput = bamInput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 _bamOutput = bamOutput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 _isUncompressedBam = isUncompressedBam;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 _sortedInput = sortedInput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 // should we print each overlap, or does the user want summary information?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 _printable = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 if (_anyHit || _noHit || _writeCount)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 _printable = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 if (_bamInput == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 IntersectBed();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 IntersectBam(bedAFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 BedIntersect::~BedIntersect(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 bool hitsFound = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 // collect and report the sufficient hits
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 hitsFound = processHits(a, hits);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 return hitsFound;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 void BedIntersect::ReportOverlapDetail(int overlapBases, const BED &a, const BED &b, CHRPOS s, CHRPOS e) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 // default. simple intersection only
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 if (_writeA == false && _writeB == false && _writeOverlap == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 _bedA->reportBedRangeNewLine(a,s,e);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 // -wa -wb write the original A and B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 else if (_writeA == true && _writeB == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 _bedA->reportBedTab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 _bedB->reportBedNewLine(b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 // -wa write just the original A
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 else if (_writeA == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 _bedA->reportBedNewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 // -wb write the intersected portion of A and the original B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 else if (_writeB == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 _bedA->reportBedRangeTab(a,s,e);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 _bedB->reportBedNewLine(b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 // -wo write the original A and B plus the no. of overlapping bases.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 else if (_writeOverlap == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 _bedA->reportBedTab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 _bedB->reportBedTab(b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 if (b.zeroLength == false) printf("%d\n", overlapBases);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 else printf("0\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 void BedIntersect::ReportOverlapSummary(const BED &a, const int &numOverlapsFound) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 // -u just report the fact that there was >= 1 overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 if (_anyHit && (numOverlapsFound >= 1)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 _bedA->reportBedNewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 // -c report the total number of features overlapped in B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 else if (_writeCount) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 _bedA->reportBedTab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 printf("%d\n", numOverlapsFound);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 // -v report iff there were no overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 else if (_noHit && (numOverlapsFound == 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 _bedA->reportBedNewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 // -wao the user wants to force the reporting of 0 overlap
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 else if (_writeAllOverlap && (numOverlapsFound == 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 _bedA->reportBedTab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 _bedB->reportNullBedTab();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 printf("0\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 bool BedIntersect::FindOneOrMoreOverlap(const BED &a) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 bool overlapsFound = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 if (_reciprocal == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 _sameStrand, _diffStrand, _overlapFraction);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 overlapsFound = _bedB->FindOneOrMoreReciprocalOverlapsPerBin(a.chrom, a.start, a.end, a.strand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 _sameStrand, _diffStrand, _overlapFraction);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 return overlapsFound;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 void BedIntersect::IntersectBed() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 // create new BED file objects for A and B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 _bedA = new BedFile(_bedAFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191 _bedB = new BedFile(_bedBFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 if (_sortedInput == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194 // load the "B" file into a map in order to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 // compare each entry in A to it in search of overlaps.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196 _bedB->loadBedFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 vector<BED> hits;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 hits.reserve(100);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201 BED a, nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204 // open the "A" file, process each BED entry and searh for overlaps.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205 _bedA->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208 // treat the BED as a single "block"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209 if (_obeySplits == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210 FindOverlaps(a, hits);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211 hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213 // split the BED12 into blocks and look for overlaps in each discrete block
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 bedVector bedBlocks; // vec to store the discrete BED "blocks"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216 splitBedIntoBlocks(a, lineNum, bedBlocks);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218 vector<BED>::const_iterator bedItr = bedBlocks.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 vector<BED>::const_iterator bedEnd = bedBlocks.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221 FindOverlaps(*bedItr, hits);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222 hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 a = nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228 _bedA->Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231 // use the chromsweep algorithm to detect overlaps on the fly.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 ChromSweep sweep = ChromSweep(_bedA, _bedB, _sameStrand, _diffStrand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 pair<BED, vector<BED> > hit_set;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 hit_set.second.reserve(10000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 while (sweep.Next(hit_set)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237 processHits(hit_set.first, hit_set.second);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 void BedIntersect::IntersectBam(string bamFile) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247 _bedB = new BedFile(_bedBFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248 _bedB->loadBedFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250 // create a dummy BED A file for printing purposes if not
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251 // using BAM output.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252 if (_bamOutput == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253 _bedA = new BedFile(_bedAFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254 _bedA->bedType = 6;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257 // open the BAM file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258 BamReader reader;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259 BamWriter writer;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261 reader.Open(bamFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 // get header & reference information
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264 string bamHeader = reader.GetHeaderText();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265 RefVector refs = reader.GetReferenceData();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267 // open a BAM output to stdout if we are writing BAM
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268 if (_bamOutput == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269 // set compression mode
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270 BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271 if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272 writer.SetCompressionMode(compressionMode);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273 // open our BAM writer
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274 writer.Open("stdout", bamHeader, refs);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277 vector<BED> hits;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278 // reserve some space
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 hits.reserve(100);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 BamAlignment bam;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283 // get each set of alignments for each pair.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 while (reader.GetNextAlignment(bam)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286 if (bam.IsMapped()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287 BED a;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288 a.chrom = refs.at(bam.RefID).RefName;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289 a.start = bam.Position;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
290 a.end = bam.GetEndPosition(false, false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
291
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
292 // build the name field from the BAM alignment.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
293 a.name = bam.Name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
294 if (bam.IsFirstMate()) a.name += "/1";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
295 if (bam.IsSecondMate()) a.name += "/2";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
296
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
297 a.score = ToString(bam.MapQuality);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
298
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
299 a.strand = "+";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
300 if (bam.IsReverseStrand()) a.strand = "-";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
301
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
302 if (_bamOutput == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
303 bool overlapsFound = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
304 // treat the BAM alignment as a single "block"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
305 if (_obeySplits == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
306 overlapsFound = FindOneOrMoreOverlap(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
307 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
308 // split the BAM alignment into discrete blocks and
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
309 // look for overlaps only within each block.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
310 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
311 bool overlapFoundForBlock;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
312 bedVector bedBlocks; // vec to store the discrete BED "blocks" from a
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
313 // we don't want to split on "D" ops, hence the "false"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
314 getBamBlocks(bam, refs, bedBlocks, false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
315
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
316 vector<BED>::const_iterator bedItr = bedBlocks.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
317 vector<BED>::const_iterator bedEnd = bedBlocks.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
318 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
319 overlapFoundForBlock = FindOneOrMoreOverlap(*bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
320 if (overlapFoundForBlock == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
321 overlapsFound = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
322 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
323 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
324 if (overlapsFound == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
325 if (_noHit == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
326 writer.SaveAlignment(bam);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
327 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
328 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
329 if (_noHit == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
330 writer.SaveAlignment(bam);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
331 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
332 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
333 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
334 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
335 // treat the BAM alignment as a single BED "block"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
336 if (_obeySplits == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
337 FindOverlaps(a, hits);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
338 hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
339 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
340 // split the BAM alignment into discrete BED blocks and
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
341 // look for overlaps only within each block.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
342 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
343 bedVector bedBlocks; // vec to store the discrete BED "blocks" from a
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
344 getBamBlocks(bam, refs, bedBlocks, false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
345
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
346 vector<BED>::const_iterator bedItr = bedBlocks.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
347 vector<BED>::const_iterator bedEnd = bedBlocks.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
348 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
349 FindOverlaps(*bedItr, hits);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
350 hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
351 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
352 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
353 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
354 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
355 // BAM IsMapped() is false
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
356 else if (_noHit == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
357 writer.SaveAlignment(bam);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
358 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
359 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
360
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
361 // close the relevant BAM files.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
362 reader.Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
363 if (_bamOutput == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
364 writer.Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
365 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
366 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
367