comparison BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 windowBed.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "windowBed.h"
14
15
16 /*
17 Constructor
18 */
19 BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop,
20 bool anyHit, bool noHit, bool writeCount, bool strandWindows,
21 bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam) {
22
23 _bedAFile = bedAFile;
24 _bedBFile = bedBFile;
25
26 _leftSlop = leftSlop;
27 _rightSlop = rightSlop;
28
29 _anyHit = anyHit;
30 _noHit = noHit;
31 _writeCount = writeCount;
32 _strandWindows = strandWindows;
33 _matchOnSameStrand = matchOnSameStrand;
34 _matchOnDiffStrand = matchOnDiffStrand;
35 _bamInput = bamInput;
36 _bamOutput = bamOutput;
37 _isUncompressedBam = isUncompressedBam;
38
39 _bedA = new BedFile(bedAFile);
40 _bedB = new BedFile(bedBFile);
41
42 if (_bamInput == false)
43 WindowIntersectBed();
44 else
45 WindowIntersectBam(_bedAFile);
46 }
47
48
49
50 /*
51 Destructor
52 */
53 BedWindow::~BedWindow(void) {
54 }
55
56
57
58 void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) {
59
60 /*
61 Adjust the start and end of a based on the requested window
62 */
63
64 // update the current feature's start and end
65 // according to the slop requested (slop = 0 by default)
66 CHRPOS aFudgeStart = 0;
67 CHRPOS aFudgeEnd;
68 AddWindow(a, aFudgeStart, aFudgeEnd);
69
70 /*
71 Now report the hits (if any) based on the window around a.
72 */
73 // get the hits in B for the A feature
74 _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _matchOnSameStrand, _matchOnDiffStrand);
75
76 int numOverlaps = 0;
77
78 // loop through the hits and report those that meet the user's criteria
79 vector<BED>::const_iterator h = hits.begin();
80 vector<BED>::const_iterator hitsEnd = hits.end();
81 for (; h != hitsEnd; ++h) {
82
83 int s = max(aFudgeStart, h->start);
84 int e = min(aFudgeEnd, h->end);
85 int overlapBases = (e - s); // the number of overlapping bases b/w a and b
86 int aLength = (a.end - a.start); // the length of a in b.p.
87
88 if (s < e) {
89 // is there enough overlap (default ~ 1bp)
90 if ( ((float) overlapBases / (float) aLength) > 0 ) {
91 numOverlaps++;
92 if (_anyHit == false && _noHit == false && _writeCount == false) {
93 _bedA->reportBedTab(a);
94 _bedB->reportBedNewLine(*h);
95 }
96 }
97 }
98 }
99 if (_anyHit == true && (numOverlaps >= 1)) {
100 _bedA->reportBedNewLine(a); }
101 else if (_writeCount == true) {
102 _bedA->reportBedTab(a); printf("\t%d\n", numOverlaps);
103 }
104 else if (_noHit == true && (numOverlaps == 0)) {
105 _bedA->reportBedNewLine(a);
106 }
107 }
108
109
110 bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) {
111
112 // update the current feature's start and end
113 // according to the slop requested (slop = 0 by default)
114 CHRPOS aFudgeStart = 0;
115 CHRPOS aFudgeEnd;
116 AddWindow(a, aFudgeStart, aFudgeEnd);
117
118 bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnSameStrand, _matchOnDiffStrand);
119 return overlapsFound;
120 }
121
122
123 void BedWindow::WindowIntersectBed() {
124
125 // load the "B" bed file into a map so
126 // that we can easily compare "A" to it for overlaps
127 _bedB->loadBedFileIntoMap();
128
129 BED a, nullBed;
130 int lineNum = 0; // current input line number
131 BedLineStatus bedStatus;
132 vector<BED> hits; // vector of potential hits
133 hits.reserve(100);
134
135 _bedA->Open();
136 while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
137 if (bedStatus == BED_VALID) {
138 FindWindowOverlaps(a, hits);
139 hits.clear();
140 a = nullBed;
141 }
142 }
143 _bedA->Close();
144 }
145
146
147 void BedWindow::WindowIntersectBam(string bamFile) {
148
149 // load the "B" bed file into a map so
150 // that we can easily compare "A" to it for overlaps
151 _bedB->loadBedFileIntoMap();
152
153 // open the BAM file
154 BamReader reader;
155 BamWriter writer;
156 reader.Open(bamFile);
157
158 // get header & reference information
159 string bamHeader = reader.GetHeaderText();
160 RefVector refs = reader.GetReferenceData();
161
162 // open a BAM output to stdout if we are writing BAM
163 if (_bamOutput == true) {
164 // set compression mode
165 BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
166 if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
167 writer.SetCompressionMode(compressionMode);
168 // open our BAM writer
169 writer.Open("stdout", bamHeader, refs);
170 }
171
172 vector<BED> hits; // vector of potential hits
173 // reserve some space
174 hits.reserve(100);
175
176 _bedA->bedType = 6;
177 BamAlignment bam;
178 bool overlapsFound;
179 // get each set of alignments for each pair.
180 while (reader.GetNextAlignment(bam)) {
181
182 if (bam.IsMapped()) {
183 BED a;
184 a.chrom = refs.at(bam.RefID).RefName;
185 a.start = bam.Position;
186 a.end = bam.GetEndPosition(false, false);
187
188 // build the name field from the BAM alignment.
189 a.name = bam.Name;
190 if (bam.IsFirstMate()) a.name += "/1";
191 if (bam.IsSecondMate()) a.name += "/2";
192
193 a.score = ToString(bam.MapQuality);
194 a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
195
196 if (_bamOutput == true) {
197 overlapsFound = FindOneOrMoreWindowOverlaps(a);
198 if (overlapsFound == true) {
199 if (_noHit == false)
200 writer.SaveAlignment(bam);
201 }
202 else {
203 if (_noHit == true)
204 writer.SaveAlignment(bam);
205 }
206 }
207 else {
208 FindWindowOverlaps(a, hits);
209 hits.clear();
210 }
211 }
212 // BAM IsMapped() is false
213 else if (_noHit == true) {
214 writer.SaveAlignment(bam);
215 }
216 }
217
218 // close the relevant BAM files.
219 reader.Close();
220 if (_bamOutput == true) {
221 writer.Close();
222 }
223 }
224
225
226 void BedWindow::AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd) {
227 // Does the user want to treat the windows based on strand?
228 // If so,
229 // if "+", then left is left and right is right
230 // if "-", the left is right and right is left.
231 if (_strandWindows) {
232 if (a.strand == "+") {
233 if ((int) (a.start - _leftSlop) > 0)
234 fudgeStart = a.start - _leftSlop;
235 else fudgeStart = 0;
236 fudgeEnd = a.end + _rightSlop;
237 }
238 else {
239 if ((int) (a.start - _rightSlop) > 0)
240 fudgeStart = a.start - _rightSlop;
241 else fudgeStart = 0;
242 fudgeEnd = a.end + _leftSlop;
243 }
244 }
245 // If not, add the windows irrespective of strand
246 else {
247 if ((int) (a.start - _leftSlop) > 0)
248 fudgeStart = a.start - _leftSlop;
249 else fudgeStart = 0;
250 fudgeEnd = a.end + _rightSlop;
251 }
252 }
253