Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp @ 0:dfcd8b6c1bda
Uploaded
| author | aaronquinlan |
|---|---|
| date | Thu, 03 Nov 2011 10:25:04 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfcd8b6c1bda |
|---|---|
| 1 /***************************************************************************** | |
| 2 pairToPair.cpp | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #include "lineFileUtilities.h" | |
| 13 #include "pairToPair.h" | |
| 14 | |
| 15 | |
| 16 /* | |
| 17 Constructor | |
| 18 */ | |
| 19 PairToPair::PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction, | |
| 20 string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop) { | |
| 21 | |
| 22 _bedAFilePE = bedAFilePE; | |
| 23 _bedBFilePE = bedBFilePE; | |
| 24 _overlapFraction = overlapFraction; | |
| 25 _searchType = searchType; | |
| 26 _ignoreStrand = ignoreStrand; | |
| 27 _reqDiffNames = reqDiffNames; | |
| 28 _slop = slop; | |
| 29 _strandedSlop = strandedSlop; | |
| 30 | |
| 31 _bedA = new BedFilePE(bedAFilePE); | |
| 32 _bedB = new BedFilePE(bedBFilePE); | |
| 33 | |
| 34 IntersectPairs(); | |
| 35 } | |
| 36 | |
| 37 | |
| 38 /* | |
| 39 Destructor | |
| 40 */ | |
| 41 PairToPair::~PairToPair(void) { | |
| 42 } | |
| 43 | |
| 44 | |
| 45 | |
| 46 void PairToPair::IntersectPairs() { | |
| 47 | |
| 48 // load the "B" bed file into a map so | |
| 49 // that we can easily compare "A" to it for overlaps | |
| 50 _bedB->loadBedPEFileIntoMap(); | |
| 51 | |
| 52 int lineNum = 0; | |
| 53 BedLineStatus bedStatus; | |
| 54 BEDPE a, nullBedPE; | |
| 55 | |
| 56 _bedA->Open(); | |
| 57 while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) { | |
| 58 if (bedStatus == BED_VALID) { | |
| 59 // identify overlaps b/w the pairs | |
| 60 FindOverlaps(a); | |
| 61 a = nullBedPE; | |
| 62 } | |
| 63 } | |
| 64 _bedA->Close(); | |
| 65 } | |
| 66 // END IntersectPE | |
| 67 | |
| 68 | |
| 69 | |
| 70 void PairToPair::FindOverlaps(const BEDPE &a) { | |
| 71 // | |
| 72 vector<MATE> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2; | |
| 73 | |
| 74 // add the appropriate slop to the starts and ends | |
| 75 int start1 = a.start1; | |
| 76 int end1 = a.end1; | |
| 77 int start2 = a.start2; | |
| 78 int end2 = a.end2; | |
| 79 | |
| 80 if (_strandedSlop == true) { | |
| 81 if (a.strand1 == "+") | |
| 82 end1 += _slop; | |
| 83 else | |
| 84 start1 -= _slop; | |
| 85 if (a.strand2 == "+") | |
| 86 end2 += _slop; | |
| 87 else | |
| 88 start2 -= _slop; | |
| 89 } | |
| 90 else { | |
| 91 (start1 - _slop) >= 0 ? start1 -= _slop : start1 = 0; | |
| 92 (start2 - _slop) >= 0 ? start2 -= _slop : start2 = 0; | |
| 93 end1 += _slop; | |
| 94 end2 += _slop; | |
| 95 } | |
| 96 | |
| 97 // Find the _potential_ hits between each end of A and B | |
| 98 _bedB->FindOverlapsPerBin(1, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B1 | |
| 99 _bedB->FindOverlapsPerBin(1, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B1 | |
| 100 _bedB->FindOverlapsPerBin(2, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B2 | |
| 101 _bedB->FindOverlapsPerBin(2, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B2 | |
| 102 | |
| 103 unsigned int matchCount1 = (hitsA1B1.size() + hitsA2B2.size()); | |
| 104 unsigned int matchCount2 = (hitsA2B1.size() + hitsA1B2.size()); | |
| 105 | |
| 106 | |
| 107 // report the fact that no hits were found iff _searchType is neither. | |
| 108 if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) { | |
| 109 _bedA->reportBedPENewLine(a); | |
| 110 } | |
| 111 else if (_searchType == "both") { | |
| 112 bool found1 = false; | |
| 113 bool found2 = false; | |
| 114 if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0)) | |
| 115 found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2); | |
| 116 if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0)) | |
| 117 found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2); | |
| 118 } | |
| 119 else if (_searchType == "notboth") { | |
| 120 bool found1 = false; | |
| 121 bool found2 = false; | |
| 122 if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0)) | |
| 123 found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2); | |
| 124 if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0)) | |
| 125 found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2); | |
| 126 if (found1 == false && found2 == false) | |
| 127 _bedA->reportBedPENewLine(a); | |
| 128 } | |
| 129 else if (_searchType == "either") { | |
| 130 FindHitsOnEitherEnd(a, hitsA1B1, hitsA2B2); | |
| 131 FindHitsOnEitherEnd(a, hitsA2B1, hitsA1B2); | |
| 132 } | |
| 133 } | |
| 134 | |
| 135 | |
| 136 bool PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, | |
| 137 const vector<MATE> &qualityHitsEnd2) { | |
| 138 | |
| 139 map<unsigned int, vector<MATE>, less<int> > hitsMap; | |
| 140 | |
| 141 for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) { | |
| 142 hitsMap[h->lineNum].push_back(*h); | |
| 143 } | |
| 144 for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) { | |
| 145 hitsMap[h->lineNum].push_back(*h); | |
| 146 } | |
| 147 | |
| 148 | |
| 149 bool bothFound = false; | |
| 150 for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) { | |
| 151 | |
| 152 // hits on both sides | |
| 153 if (m->second.size() >= 2) { | |
| 154 bothFound = true; | |
| 155 MATE b1 = m->second[0]; | |
| 156 MATE b2 = m->second[1]; | |
| 157 | |
| 158 if (_searchType == "both") { | |
| 159 _bedA->reportBedPETab(a); | |
| 160 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, | |
| 161 b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end, | |
| 162 b1.bed.name.c_str(), b1.bed.score.c_str(), | |
| 163 b1.bed.strand.c_str(), b2.bed.strand.c_str()); | |
| 164 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) | |
| 165 printf("\t%s", b1.bed.otherFields[i].c_str()); | |
| 166 printf("\n"); | |
| 167 } | |
| 168 } | |
| 169 } | |
| 170 return bothFound; | |
| 171 } | |
| 172 | |
| 173 | |
| 174 void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, | |
| 175 const vector<MATE> &qualityHitsEnd2) { | |
| 176 | |
| 177 map<unsigned int, vector<MATE>, less<int> > hitsMap; | |
| 178 | |
| 179 for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) { | |
| 180 hitsMap[h->lineNum].push_back(*h); | |
| 181 } | |
| 182 for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) { | |
| 183 hitsMap[h->lineNum].push_back(*h); | |
| 184 } | |
| 185 | |
| 186 for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) { | |
| 187 if (m->second.size() >= 1) { | |
| 188 | |
| 189 if ((m->second.size()) == 2) { | |
| 190 MATE b1 = m->second[0]; | |
| 191 MATE b2 = m->second[1]; | |
| 192 | |
| 193 _bedA->reportBedPETab(a); | |
| 194 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, | |
| 195 b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end, | |
| 196 b1.bed.name.c_str(), b1.bed.score.c_str(), | |
| 197 b1.bed.strand.c_str(), b2.bed.strand.c_str()); | |
| 198 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) | |
| 199 printf("\t%s", b1.bed.otherFields[i].c_str()); | |
| 200 printf("\n"); | |
| 201 } | |
| 202 else { | |
| 203 MATE b1 = m->second[0]; | |
| 204 | |
| 205 _bedA->reportBedPETab(a); | |
| 206 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, | |
| 207 b1.mate->bed.chrom.c_str(), b1.mate->bed.start, b1.mate->bed.end, | |
| 208 b1.bed.name.c_str(), b1.bed.score.c_str(), | |
| 209 b1.bed.strand.c_str(), b1.mate->bed.strand.c_str()); | |
| 210 for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) | |
| 211 printf("\t%s", b1.bed.otherFields[i].c_str()); | |
| 212 printf("\n"); | |
| 213 } | |
| 214 } | |
| 215 } | |
| 216 } |
