Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 pairToPair.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include "lineFileUtilities.h" | |
13 #include "pairToPair.h" | |
14 | |
15 | |
16 /* | |
17 Constructor | |
18 */ | |
19 PairToPair::PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction, | |
20 string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop) { | |
21 | |
22 _bedAFilePE = bedAFilePE; | |
23 _bedBFilePE = bedBFilePE; | |
24 _overlapFraction = overlapFraction; | |
25 _searchType = searchType; | |
26 _ignoreStrand = ignoreStrand; | |
27 _reqDiffNames = reqDiffNames; | |
28 _slop = slop; | |
29 _strandedSlop = strandedSlop; | |
30 | |
31 _bedA = new BedFilePE(bedAFilePE); | |
32 _bedB = new BedFilePE(bedBFilePE); | |
33 | |
34 IntersectPairs(); | |
35 } | |
36 | |
37 | |
38 /* | |
39 Destructor | |
40 */ | |
41 PairToPair::~PairToPair(void) { | |
42 } | |
43 | |
44 | |
45 | |
46 void PairToPair::IntersectPairs() { | |
47 | |
48 // load the "B" bed file into a map so | |
49 // that we can easily compare "A" to it for overlaps | |
50 _bedB->loadBedPEFileIntoMap(); | |
51 | |
52 int lineNum = 0; | |
53 BedLineStatus bedStatus; | |
54 BEDPE a, nullBedPE; | |
55 | |
56 _bedA->Open(); | |
57 while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) { | |
58 if (bedStatus == BED_VALID) { | |
59 // identify overlaps b/w the pairs | |
60 FindOverlaps(a); | |
61 a = nullBedPE; | |
62 } | |
63 } | |
64 _bedA->Close(); | |
65 } | |
66 // END IntersectPE | |
67 | |
68 | |
69 | |
70 void PairToPair::FindOverlaps(const BEDPE &a) { | |
71 // | |
72 vector<MATE> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2; | |
73 | |
74 // add the appropriate slop to the starts and ends | |
75 int start1 = a.start1; | |
76 int end1 = a.end1; | |
77 int start2 = a.start2; | |
78 int end2 = a.end2; | |
79 | |
80 if (_strandedSlop == true) { | |
81 if (a.strand1 == "+") | |
82 end1 += _slop; | |
83 else | |
84 start1 -= _slop; | |
85 if (a.strand2 == "+") | |
86 end2 += _slop; | |
87 else | |
88 start2 -= _slop; | |
89 } | |
90 else { | |
91 (start1 - _slop) >= 0 ? start1 -= _slop : start1 = 0; | |
92 (start2 - _slop) >= 0 ? start2 -= _slop : start2 = 0; | |
93 end1 += _slop; | |
94 end2 += _slop; | |
95 } | |
96 | |
97 // Find the _potential_ hits between each end of A and B | |
98 _bedB->FindOverlapsPerBin(1, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B1 | |
99 _bedB->FindOverlapsPerBin(1, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B1 | |
100 _bedB->FindOverlapsPerBin(2, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B2 | |
101 _bedB->FindOverlapsPerBin(2, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B2 | |
102 | |
103 unsigned int matchCount1 = (hitsA1B1.size() + hitsA2B2.size()); | |
104 unsigned int matchCount2 = (hitsA2B1.size() + hitsA1B2.size()); | |
105 | |
106 | |
107 // report the fact that no hits were found iff _searchType is neither. | |
108 if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) { | |
109 _bedA->reportBedPENewLine(a); | |
110 } | |
111 else if (_searchType == "both") { | |
112 bool found1 = false; | |
113 bool found2 = false; | |
114 if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0)) | |
115 found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2); | |
116 if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0)) | |
117 found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2); | |
118 } | |
119 else if (_searchType == "notboth") { | |
120 bool found1 = false; | |
121 bool found2 = false; | |
122 if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0)) | |
123 found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2); | |
124 if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0)) | |
125 found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2); | |
126 if (found1 == false && found2 == false) | |
127 _bedA->reportBedPENewLine(a); | |
128 } | |
129 else if (_searchType == "either") { | |
130 FindHitsOnEitherEnd(a, hitsA1B1, hitsA2B2); | |
131 FindHitsOnEitherEnd(a, hitsA2B1, hitsA1B2); | |
132 } | |
133 } | |
134 | |
135 | |
136 bool PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, | |
137 const vector<MATE> &qualityHitsEnd2) { | |
138 | |
139 map<unsigned int, vector<MATE>, less<int> > hitsMap; | |
140 | |
141 for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) { | |
142 hitsMap[h->lineNum].push_back(*h); | |
143 } | |
144 for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) { | |
145 hitsMap[h->lineNum].push_back(*h); | |
146 } | |
147 | |
148 | |
149 bool bothFound = false; | |
150 for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) { | |
151 | |
152 // hits on both sides | |
153 if (m->second.size() >= 2) { | |
154 bothFound = true; | |
155 MATE b1 = m->second[0]; | |
156 MATE b2 = m->second[1]; | |
157 | |
158 if (_searchType == "both") { | |
159 _bedA->reportBedPETab(a); | |
160 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, | |
161 b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end, | |
162 b1.bed.name.c_str(), b1.bed.score.c_str(), | |
163 b1.bed.strand.c_str(), b2.bed.strand.c_str()); | |
164 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) | |
165 printf("\t%s", b1.bed.otherFields[i].c_str()); | |
166 printf("\n"); | |
167 } | |
168 } | |
169 } | |
170 return bothFound; | |
171 } | |
172 | |
173 | |
174 void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, | |
175 const vector<MATE> &qualityHitsEnd2) { | |
176 | |
177 map<unsigned int, vector<MATE>, less<int> > hitsMap; | |
178 | |
179 for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) { | |
180 hitsMap[h->lineNum].push_back(*h); | |
181 } | |
182 for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) { | |
183 hitsMap[h->lineNum].push_back(*h); | |
184 } | |
185 | |
186 for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) { | |
187 if (m->second.size() >= 1) { | |
188 | |
189 if ((m->second.size()) == 2) { | |
190 MATE b1 = m->second[0]; | |
191 MATE b2 = m->second[1]; | |
192 | |
193 _bedA->reportBedPETab(a); | |
194 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, | |
195 b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end, | |
196 b1.bed.name.c_str(), b1.bed.score.c_str(), | |
197 b1.bed.strand.c_str(), b2.bed.strand.c_str()); | |
198 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) | |
199 printf("\t%s", b1.bed.otherFields[i].c_str()); | |
200 printf("\n"); | |
201 } | |
202 else { | |
203 MATE b1 = m->second[0]; | |
204 | |
205 _bedA->reportBedPETab(a); | |
206 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, | |
207 b1.mate->bed.chrom.c_str(), b1.mate->bed.start, b1.mate->bed.end, | |
208 b1.bed.name.c_str(), b1.bed.score.c_str(), | |
209 b1.bed.strand.c_str(), b1.mate->bed.strand.c_str()); | |
210 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) | |
211 printf("\t%s", b1.bed.otherFields[i].c_str()); | |
212 printf("\n"); | |
213 } | |
214 } | |
215 } | |
216 } |