comparison BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 pairToPair.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "pairToPair.h"
14
15
16 /*
17 Constructor
18 */
19 PairToPair::PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction,
20 string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop) {
21
22 _bedAFilePE = bedAFilePE;
23 _bedBFilePE = bedBFilePE;
24 _overlapFraction = overlapFraction;
25 _searchType = searchType;
26 _ignoreStrand = ignoreStrand;
27 _reqDiffNames = reqDiffNames;
28 _slop = slop;
29 _strandedSlop = strandedSlop;
30
31 _bedA = new BedFilePE(bedAFilePE);
32 _bedB = new BedFilePE(bedBFilePE);
33
34 IntersectPairs();
35 }
36
37
38 /*
39 Destructor
40 */
41 PairToPair::~PairToPair(void) {
42 }
43
44
45
46 void PairToPair::IntersectPairs() {
47
48 // load the "B" bed file into a map so
49 // that we can easily compare "A" to it for overlaps
50 _bedB->loadBedPEFileIntoMap();
51
52 int lineNum = 0;
53 BedLineStatus bedStatus;
54 BEDPE a, nullBedPE;
55
56 _bedA->Open();
57 while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {
58 if (bedStatus == BED_VALID) {
59 // identify overlaps b/w the pairs
60 FindOverlaps(a);
61 a = nullBedPE;
62 }
63 }
64 _bedA->Close();
65 }
66 // END IntersectPE
67
68
69
70 void PairToPair::FindOverlaps(const BEDPE &a) {
71 //
72 vector<MATE> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2;
73
74 // add the appropriate slop to the starts and ends
75 int start1 = a.start1;
76 int end1 = a.end1;
77 int start2 = a.start2;
78 int end2 = a.end2;
79
80 if (_strandedSlop == true) {
81 if (a.strand1 == "+")
82 end1 += _slop;
83 else
84 start1 -= _slop;
85 if (a.strand2 == "+")
86 end2 += _slop;
87 else
88 start2 -= _slop;
89 }
90 else {
91 (start1 - _slop) >= 0 ? start1 -= _slop : start1 = 0;
92 (start2 - _slop) >= 0 ? start2 -= _slop : start2 = 0;
93 end1 += _slop;
94 end2 += _slop;
95 }
96
97 // Find the _potential_ hits between each end of A and B
98 _bedB->FindOverlapsPerBin(1, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B1
99 _bedB->FindOverlapsPerBin(1, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B1
100 _bedB->FindOverlapsPerBin(2, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B2
101 _bedB->FindOverlapsPerBin(2, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B2
102
103 unsigned int matchCount1 = (hitsA1B1.size() + hitsA2B2.size());
104 unsigned int matchCount2 = (hitsA2B1.size() + hitsA1B2.size());
105
106
107 // report the fact that no hits were found iff _searchType is neither.
108 if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) {
109 _bedA->reportBedPENewLine(a);
110 }
111 else if (_searchType == "both") {
112 bool found1 = false;
113 bool found2 = false;
114 if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0))
115 found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2);
116 if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0))
117 found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2);
118 }
119 else if (_searchType == "notboth") {
120 bool found1 = false;
121 bool found2 = false;
122 if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0))
123 found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2);
124 if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0))
125 found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2);
126 if (found1 == false && found2 == false)
127 _bedA->reportBedPENewLine(a);
128 }
129 else if (_searchType == "either") {
130 FindHitsOnEitherEnd(a, hitsA1B1, hitsA2B2);
131 FindHitsOnEitherEnd(a, hitsA2B1, hitsA1B2);
132 }
133 }
134
135
136 bool PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,
137 const vector<MATE> &qualityHitsEnd2) {
138
139 map<unsigned int, vector<MATE>, less<int> > hitsMap;
140
141 for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {
142 hitsMap[h->lineNum].push_back(*h);
143 }
144 for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {
145 hitsMap[h->lineNum].push_back(*h);
146 }
147
148
149 bool bothFound = false;
150 for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {
151
152 // hits on both sides
153 if (m->second.size() >= 2) {
154 bothFound = true;
155 MATE b1 = m->second[0];
156 MATE b2 = m->second[1];
157
158 if (_searchType == "both") {
159 _bedA->reportBedPETab(a);
160 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,
161 b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end,
162 b1.bed.name.c_str(), b1.bed.score.c_str(),
163 b1.bed.strand.c_str(), b2.bed.strand.c_str());
164 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)
165 printf("\t%s", b1.bed.otherFields[i].c_str());
166 printf("\n");
167 }
168 }
169 }
170 return bothFound;
171 }
172
173
174 void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1,
175 const vector<MATE> &qualityHitsEnd2) {
176
177 map<unsigned int, vector<MATE>, less<int> > hitsMap;
178
179 for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) {
180 hitsMap[h->lineNum].push_back(*h);
181 }
182 for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) {
183 hitsMap[h->lineNum].push_back(*h);
184 }
185
186 for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) {
187 if (m->second.size() >= 1) {
188
189 if ((m->second.size()) == 2) {
190 MATE b1 = m->second[0];
191 MATE b2 = m->second[1];
192
193 _bedA->reportBedPETab(a);
194 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,
195 b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end,
196 b1.bed.name.c_str(), b1.bed.score.c_str(),
197 b1.bed.strand.c_str(), b2.bed.strand.c_str());
198 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)
199 printf("\t%s", b1.bed.otherFields[i].c_str());
200 printf("\n");
201 }
202 else {
203 MATE b1 = m->second[0];
204
205 _bedA->reportBedPETab(a);
206 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end,
207 b1.mate->bed.chrom.c_str(), b1.mate->bed.start, b1.mate->bed.end,
208 b1.bed.name.c_str(), b1.bed.score.c_str(),
209 b1.bed.strand.c_str(), b1.mate->bed.strand.c_str());
210 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i)
211 printf("\t%s", b1.bed.otherFields[i].c_str());
212 printf("\n");
213 }
214 }
215 }
216 }