Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/pairToPair/pairToPair.cpp Thu Nov 03 10:25:04 2011 -0400 @@ -0,0 +1,216 @@ +/***************************************************************************** + pairToPair.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "pairToPair.h" + + +/* + Constructor +*/ +PairToPair::PairToPair(string &bedAFilePE, string &bedBFilePE, float &overlapFraction, + string searchType, bool ignoreStrand, bool reqDiffNames, int slop, bool strandedSlop) { + + _bedAFilePE = bedAFilePE; + _bedBFilePE = bedBFilePE; + _overlapFraction = overlapFraction; + _searchType = searchType; + _ignoreStrand = ignoreStrand; + _reqDiffNames = reqDiffNames; + _slop = slop; + _strandedSlop = strandedSlop; + + _bedA = new BedFilePE(bedAFilePE); + _bedB = new BedFilePE(bedBFilePE); + + IntersectPairs(); +} + + +/* + Destructor +*/ +PairToPair::~PairToPair(void) { +} + + + +void PairToPair::IntersectPairs() { + + // load the "B" bed file into a map so + // that we can easily compare "A" to it for overlaps + _bedB->loadBedPEFileIntoMap(); + + int lineNum = 0; + BedLineStatus bedStatus; + BEDPE a, nullBedPE; + + _bedA->Open(); + while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + // identify overlaps b/w the pairs + FindOverlaps(a); + a = nullBedPE; + } + } + _bedA->Close(); +} +// END IntersectPE + + + +void PairToPair::FindOverlaps(const BEDPE &a) { + // + vector<MATE> hitsA1B1, hitsA1B2, hitsA2B1, hitsA2B2; + + // add the appropriate slop to the starts and ends + int start1 = a.start1; + int end1 = a.end1; + int start2 = a.start2; + int end2 = a.end2; + + if (_strandedSlop == true) { + if (a.strand1 == "+") + end1 += _slop; + else + start1 -= _slop; + if (a.strand2 == "+") + end2 += _slop; + else + start2 -= _slop; + } + else { + (start1 - _slop) >= 0 ? start1 -= _slop : start1 = 0; + (start2 - _slop) >= 0 ? start2 -= _slop : start2 = 0; + end1 += _slop; + end2 += _slop; + } + + // Find the _potential_ hits between each end of A and B + _bedB->FindOverlapsPerBin(1, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B1 + _bedB->FindOverlapsPerBin(1, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B1, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B1 + _bedB->FindOverlapsPerBin(2, a.chrom1, start1, end1, a.name, a.strand1, hitsA1B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A1 & B2 + _bedB->FindOverlapsPerBin(2, a.chrom2, start2, end2, a.name, a.strand2, hitsA2B2, _overlapFraction, !(_ignoreStrand), _reqDiffNames); // hits b/w A2 & B2 + + unsigned int matchCount1 = (hitsA1B1.size() + hitsA2B2.size()); + unsigned int matchCount2 = (hitsA2B1.size() + hitsA1B2.size()); + + + // report the fact that no hits were found iff _searchType is neither. + if ((matchCount1 == 0) && (matchCount2 == 0) && (_searchType == "neither")) { + _bedA->reportBedPENewLine(a); + } + else if (_searchType == "both") { + bool found1 = false; + bool found2 = false; + if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0)) + found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2); + if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0)) + found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2); + } + else if (_searchType == "notboth") { + bool found1 = false; + bool found2 = false; + if ((hitsA1B1.size() > 0) || (hitsA2B2.size() > 0)) + found1 = FindHitsOnBothEnds(a, hitsA1B1, hitsA2B2); + if ((hitsA2B1.size() > 0) || (hitsA1B2.size() > 0)) + found2 = FindHitsOnBothEnds(a, hitsA2B1, hitsA1B2); + if (found1 == false && found2 == false) + _bedA->reportBedPENewLine(a); + } + else if (_searchType == "either") { + FindHitsOnEitherEnd(a, hitsA1B1, hitsA2B2); + FindHitsOnEitherEnd(a, hitsA2B1, hitsA1B2); + } +} + + +bool PairToPair::FindHitsOnBothEnds(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, + const vector<MATE> &qualityHitsEnd2) { + + map<unsigned int, vector<MATE>, less<int> > hitsMap; + + for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) { + hitsMap[h->lineNum].push_back(*h); + } + for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) { + hitsMap[h->lineNum].push_back(*h); + } + + + bool bothFound = false; + for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) { + + // hits on both sides + if (m->second.size() >= 2) { + bothFound = true; + MATE b1 = m->second[0]; + MATE b2 = m->second[1]; + + if (_searchType == "both") { + _bedA->reportBedPETab(a); + printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, + b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end, + b1.bed.name.c_str(), b1.bed.score.c_str(), + b1.bed.strand.c_str(), b2.bed.strand.c_str()); + for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) + printf("\t%s", b1.bed.otherFields[i].c_str()); + printf("\n"); + } + } + } + return bothFound; +} + + +void PairToPair::FindHitsOnEitherEnd(const BEDPE &a, const vector<MATE> &qualityHitsEnd1, + const vector<MATE> &qualityHitsEnd2) { + + map<unsigned int, vector<MATE>, less<int> > hitsMap; + + for (vector<MATE>::const_iterator h = qualityHitsEnd1.begin(); h != qualityHitsEnd1.end(); ++h) { + hitsMap[h->lineNum].push_back(*h); + } + for (vector<MATE>::const_iterator h = qualityHitsEnd2.begin(); h != qualityHitsEnd2.end(); ++h) { + hitsMap[h->lineNum].push_back(*h); + } + + for (map<unsigned int, vector<MATE>, less<unsigned int> >::iterator m = hitsMap.begin(); m != hitsMap.end(); ++m) { + if (m->second.size() >= 1) { + + if ((m->second.size()) == 2) { + MATE b1 = m->second[0]; + MATE b2 = m->second[1]; + + _bedA->reportBedPETab(a); + printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, + b2.bed.chrom.c_str(), b2.bed.start, b2.bed.end, + b1.bed.name.c_str(), b1.bed.score.c_str(), + b1.bed.strand.c_str(), b2.bed.strand.c_str()); + for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) + printf("\t%s", b1.bed.otherFields[i].c_str()); + printf("\n"); + } + else { + MATE b1 = m->second[0]; + + _bedA->reportBedPETab(a); + printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", b1.bed.chrom.c_str(), b1.bed.start, b1.bed.end, + b1.mate->bed.chrom.c_str(), b1.mate->bed.start, b1.mate->bed.end, + b1.bed.name.c_str(), b1.bed.score.c_str(), + b1.bed.strand.c_str(), b1.mate->bed.strand.c_str()); + for (size_t i = 0; i < b1.bed.otherFields.size(); ++i) + printf("\t%s", b1.bed.otherFields[i].c_str()); + printf("\n"); + } + } + } +}