Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 windowBed.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include "lineFileUtilities.h" | |
13 #include "windowBed.h" | |
14 | |
15 | |
16 /* | |
17 Constructor | |
18 */ | |
19 BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, | |
20 bool anyHit, bool noHit, bool writeCount, bool strandWindows, | |
21 bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam) { | |
22 | |
23 _bedAFile = bedAFile; | |
24 _bedBFile = bedBFile; | |
25 | |
26 _leftSlop = leftSlop; | |
27 _rightSlop = rightSlop; | |
28 | |
29 _anyHit = anyHit; | |
30 _noHit = noHit; | |
31 _writeCount = writeCount; | |
32 _strandWindows = strandWindows; | |
33 _matchOnSameStrand = matchOnSameStrand; | |
34 _matchOnDiffStrand = matchOnDiffStrand; | |
35 _bamInput = bamInput; | |
36 _bamOutput = bamOutput; | |
37 _isUncompressedBam = isUncompressedBam; | |
38 | |
39 _bedA = new BedFile(bedAFile); | |
40 _bedB = new BedFile(bedBFile); | |
41 | |
42 if (_bamInput == false) | |
43 WindowIntersectBed(); | |
44 else | |
45 WindowIntersectBam(_bedAFile); | |
46 } | |
47 | |
48 | |
49 | |
50 /* | |
51 Destructor | |
52 */ | |
53 BedWindow::~BedWindow(void) { | |
54 } | |
55 | |
56 | |
57 | |
58 void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) { | |
59 | |
60 /* | |
61 Adjust the start and end of a based on the requested window | |
62 */ | |
63 | |
64 // update the current feature's start and end | |
65 // according to the slop requested (slop = 0 by default) | |
66 CHRPOS aFudgeStart = 0; | |
67 CHRPOS aFudgeEnd; | |
68 AddWindow(a, aFudgeStart, aFudgeEnd); | |
69 | |
70 /* | |
71 Now report the hits (if any) based on the window around a. | |
72 */ | |
73 // get the hits in B for the A feature | |
74 _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _matchOnSameStrand, _matchOnDiffStrand); | |
75 | |
76 int numOverlaps = 0; | |
77 | |
78 // loop through the hits and report those that meet the user's criteria | |
79 vector<BED>::const_iterator h = hits.begin(); | |
80 vector<BED>::const_iterator hitsEnd = hits.end(); | |
81 for (; h != hitsEnd; ++h) { | |
82 | |
83 int s = max(aFudgeStart, h->start); | |
84 int e = min(aFudgeEnd, h->end); | |
85 int overlapBases = (e - s); // the number of overlapping bases b/w a and b | |
86 int aLength = (a.end - a.start); // the length of a in b.p. | |
87 | |
88 if (s < e) { | |
89 // is there enough overlap (default ~ 1bp) | |
90 if ( ((float) overlapBases / (float) aLength) > 0 ) { | |
91 numOverlaps++; | |
92 if (_anyHit == false && _noHit == false && _writeCount == false) { | |
93 _bedA->reportBedTab(a); | |
94 _bedB->reportBedNewLine(*h); | |
95 } | |
96 } | |
97 } | |
98 } | |
99 if (_anyHit == true && (numOverlaps >= 1)) { | |
100 _bedA->reportBedNewLine(a); } | |
101 else if (_writeCount == true) { | |
102 _bedA->reportBedTab(a); printf("\t%d\n", numOverlaps); | |
103 } | |
104 else if (_noHit == true && (numOverlaps == 0)) { | |
105 _bedA->reportBedNewLine(a); | |
106 } | |
107 } | |
108 | |
109 | |
110 bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) { | |
111 | |
112 // update the current feature's start and end | |
113 // according to the slop requested (slop = 0 by default) | |
114 CHRPOS aFudgeStart = 0; | |
115 CHRPOS aFudgeEnd; | |
116 AddWindow(a, aFudgeStart, aFudgeEnd); | |
117 | |
118 bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnSameStrand, _matchOnDiffStrand); | |
119 return overlapsFound; | |
120 } | |
121 | |
122 | |
123 void BedWindow::WindowIntersectBed() { | |
124 | |
125 // load the "B" bed file into a map so | |
126 // that we can easily compare "A" to it for overlaps | |
127 _bedB->loadBedFileIntoMap(); | |
128 | |
129 BED a, nullBed; | |
130 int lineNum = 0; // current input line number | |
131 BedLineStatus bedStatus; | |
132 vector<BED> hits; // vector of potential hits | |
133 hits.reserve(100); | |
134 | |
135 _bedA->Open(); | |
136 while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { | |
137 if (bedStatus == BED_VALID) { | |
138 FindWindowOverlaps(a, hits); | |
139 hits.clear(); | |
140 a = nullBed; | |
141 } | |
142 } | |
143 _bedA->Close(); | |
144 } | |
145 | |
146 | |
147 void BedWindow::WindowIntersectBam(string bamFile) { | |
148 | |
149 // load the "B" bed file into a map so | |
150 // that we can easily compare "A" to it for overlaps | |
151 _bedB->loadBedFileIntoMap(); | |
152 | |
153 // open the BAM file | |
154 BamReader reader; | |
155 BamWriter writer; | |
156 reader.Open(bamFile); | |
157 | |
158 // get header & reference information | |
159 string bamHeader = reader.GetHeaderText(); | |
160 RefVector refs = reader.GetReferenceData(); | |
161 | |
162 // open a BAM output to stdout if we are writing BAM | |
163 if (_bamOutput == true) { | |
164 // set compression mode | |
165 BamWriter::CompressionMode compressionMode = BamWriter::Compressed; | |
166 if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; | |
167 writer.SetCompressionMode(compressionMode); | |
168 // open our BAM writer | |
169 writer.Open("stdout", bamHeader, refs); | |
170 } | |
171 | |
172 vector<BED> hits; // vector of potential hits | |
173 // reserve some space | |
174 hits.reserve(100); | |
175 | |
176 _bedA->bedType = 6; | |
177 BamAlignment bam; | |
178 bool overlapsFound; | |
179 // get each set of alignments for each pair. | |
180 while (reader.GetNextAlignment(bam)) { | |
181 | |
182 if (bam.IsMapped()) { | |
183 BED a; | |
184 a.chrom = refs.at(bam.RefID).RefName; | |
185 a.start = bam.Position; | |
186 a.end = bam.GetEndPosition(false, false); | |
187 | |
188 // build the name field from the BAM alignment. | |
189 a.name = bam.Name; | |
190 if (bam.IsFirstMate()) a.name += "/1"; | |
191 if (bam.IsSecondMate()) a.name += "/2"; | |
192 | |
193 a.score = ToString(bam.MapQuality); | |
194 a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; | |
195 | |
196 if (_bamOutput == true) { | |
197 overlapsFound = FindOneOrMoreWindowOverlaps(a); | |
198 if (overlapsFound == true) { | |
199 if (_noHit == false) | |
200 writer.SaveAlignment(bam); | |
201 } | |
202 else { | |
203 if (_noHit == true) | |
204 writer.SaveAlignment(bam); | |
205 } | |
206 } | |
207 else { | |
208 FindWindowOverlaps(a, hits); | |
209 hits.clear(); | |
210 } | |
211 } | |
212 // BAM IsMapped() is false | |
213 else if (_noHit == true) { | |
214 writer.SaveAlignment(bam); | |
215 } | |
216 } | |
217 | |
218 // close the relevant BAM files. | |
219 reader.Close(); | |
220 if (_bamOutput == true) { | |
221 writer.Close(); | |
222 } | |
223 } | |
224 | |
225 | |
226 void BedWindow::AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd) { | |
227 // Does the user want to treat the windows based on strand? | |
228 // If so, | |
229 // if "+", then left is left and right is right | |
230 // if "-", the left is right and right is left. | |
231 if (_strandWindows) { | |
232 if (a.strand == "+") { | |
233 if ((int) (a.start - _leftSlop) > 0) | |
234 fudgeStart = a.start - _leftSlop; | |
235 else fudgeStart = 0; | |
236 fudgeEnd = a.end + _rightSlop; | |
237 } | |
238 else { | |
239 if ((int) (a.start - _rightSlop) > 0) | |
240 fudgeStart = a.start - _rightSlop; | |
241 else fudgeStart = 0; | |
242 fudgeEnd = a.end + _leftSlop; | |
243 } | |
244 } | |
245 // If not, add the windows irrespective of strand | |
246 else { | |
247 if ((int) (a.start - _leftSlop) > 0) | |
248 fudgeStart = a.start - _leftSlop; | |
249 else fudgeStart = 0; | |
250 fudgeEnd = a.end + _rightSlop; | |
251 } | |
252 } | |
253 |