comparison BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp @ 1:bec36315bd12 default tip

Deleted selected files
author aaronquinlan
date Sat, 19 Nov 2011 14:17:03 -0500
parents dfcd8b6c1bda
children
comparison
equal deleted inserted replaced
0:dfcd8b6c1bda 1:bec36315bd12
1 /*****************************************************************************
2 subtractBed.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "subtractBed.h"
14
15
16 /*
17 Constructor
18 */
19 BedSubtract::BedSubtract(string &bedAFile, string &bedBFile, float overlapFraction, bool sameStrand, bool diffStrand) {
20
21 _bedAFile = bedAFile;
22 _bedBFile = bedBFile;
23 _overlapFraction = overlapFraction;
24 _sameStrand = sameStrand;
25 _diffStrand = diffStrand;
26
27 _bedA = new BedFile(bedAFile);
28 _bedB = new BedFile(bedBFile);
29
30 SubtractBed();
31 }
32
33
34 /*
35 Destructor
36 */
37 BedSubtract::~BedSubtract(void) {
38 }
39
40
41 void BedSubtract::FindAndSubtractOverlaps(BED &a, vector<BED> &hits) {
42
43 // find all of the overlaps between a and B.
44 _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
45
46 // is A completely spanned by an entry in B?
47 // if so, A should not be reported.
48 int numConsumedByB = 0;
49 int numOverlaps = 0;
50 vector<BED> bOverlaps; // list of hits in B. Special processing if there are multiple.
51
52 vector<BED>::const_iterator h = hits.begin();
53 vector<BED>::const_iterator hitsEnd = hits.end();
54 for (; h != hitsEnd; ++h) {
55
56 int s = max(a.start, h->start);
57 int e = min(a.end, h->end);
58 int overlapBases = (e - s); // the number of overlapping bases b/w a and b
59 int aLength = (a.end - a.start); // the length of a in b.p.
60
61 if (s < e) {
62
63 // is there enough overlap (default ~ 1bp)
64 float overlap = ((float) overlapBases / (float) aLength);
65
66 if (overlap >= 1.0) {
67 numOverlaps++;
68 numConsumedByB++;
69 }
70 else if ( overlap >= _overlapFraction ) {
71 numOverlaps++;
72 bOverlaps.push_back(*h);
73 }
74 }
75 }
76
77 if (numOverlaps == 0) {
78 // no overlap found, so just report A as-is.
79 _bedA->reportBedNewLine(a);
80 }
81 else if (numOverlaps == 1) {
82 // one overlap found. only need to look at the single
83 // entry in bOverlaps.
84
85 // if A was not "consumed" by any entry in B
86 if (numConsumedByB == 0) {
87
88 BED theHit = bOverlaps[0];
89
90 // A ++++++++++++
91 // B ----
92 // Res. ==== ====
93 if ( (theHit.start > a.start) && (theHit.end < a.end) ) {
94 _bedA->reportBedRangeNewLine(a,a.start,theHit.start);
95 _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
96 }
97 // A ++++++++++++
98 // B ----------
99 // Res. ==
100 else if (theHit.start == a.start) {
101 _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
102 }
103 // A ++++++++++++
104 // B ----------
105 // Res. ====
106 else if (theHit.start < a.start) {
107 _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
108 }
109 // A ++++++++++++
110 // B ----------
111 // Res. =======
112 else if (theHit.start > a.start) {
113 _bedA->reportBedRangeNewLine(a,a.start,theHit.start);
114 }
115 }
116 }
117 else if (numOverlaps > 1) {
118 // multiple overlapz found. look at all the hits
119 // and figure out which bases in A survived. then
120 // report the contigous intervals that survived.
121
122 vector<bool> aKeep(a.end - a.start, true);
123
124 if (numConsumedByB == 0) {
125 // track the number of hit starts and ends at each position in A
126 for (vector<BED>::iterator h = bOverlaps.begin(); h != bOverlaps.end(); ++h) {
127 int s = max(a.start, h->start);
128 int e = min(a.end, h->end);
129
130 for (int i = s+1; i <= e; ++i) {
131 aKeep[i-a.start-1] = false;
132 }
133 }
134 // report the remaining blocks.
135 for (unsigned int i = 0; i < aKeep.size(); ++i) {
136 if (aKeep[i] == true) {
137 CHRPOS blockStart = i + a.start;
138 while ((aKeep[i] == true) && (i < aKeep.size())) {
139 i++;
140 }
141 CHRPOS blockEnd = i + a.start;
142 blockEnd = min(a.end, blockEnd);
143 _bedA->reportBedRangeNewLine(a,blockStart,blockEnd);
144 }
145 }
146 }
147 }
148 }
149
150
151
152 void BedSubtract::SubtractBed() {
153
154 // load the "B" bed file into a map so
155 // that we can easily compare "A" to it for overlaps
156 _bedB->loadBedFileIntoMap();
157
158 BED a, nullBed;
159 BedLineStatus bedStatus;
160 int lineNum = 0; // current input line number
161 vector<BED> hits; // vector of potential hits
162 // reserve some space
163 hits.reserve(100);
164
165 _bedA->Open();
166 while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
167 if (bedStatus == BED_VALID) {
168 FindAndSubtractOverlaps(a, hits);
169 hits.clear();
170 a = nullBed;
171 }
172 }
173 _bedA->Close();
174
175 }
176 // END Intersect
177
178