Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp @ 1:bec36315bd12 default tip
Deleted selected files
author | aaronquinlan |
---|---|
date | Sat, 19 Nov 2011 14:17:03 -0500 |
parents | dfcd8b6c1bda |
children |
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,234 +0,0 @@ -/***************************************************************************** - closestBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "closestBed.h" - -const int MAXSLOP = 256000000; // 2*MAXSLOP = 512 megabases. - // We don't want to keep looking if we - // can't find a nearby feature within 512 Mb. -const int SLOPGROWTH = 2048000; - - -/* - Constructor -*/ -BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, - string &tieMode, bool reportDistance, bool signDistance, string &_strandedDistMode, - bool ignoreOverlaps) - : _bedAFile(bedAFile) - , _bedBFile(bedBFile) - , _tieMode(tieMode) - , _sameStrand(sameStrand) - , _diffStrand(diffStrand) - , _reportDistance(reportDistance) - , _signDistance(signDistance) - , _strandedDistMode(_strandedDistMode) - , _ignoreOverlaps(ignoreOverlaps) -{ - _bedA = new BedFile(_bedAFile); - _bedB = new BedFile(_bedBFile); - FindClosestBed(); -} - - -/* - Destructor -*/ -BedClosest::~BedClosest(void) { -} - - -void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) { - - int slop = 0; // start out just looking for overlaps - // within the current bin (~128Kb) - - // update the current feature's start and end - - CHRPOS aFudgeStart = 0; - CHRPOS aFudgeEnd; - int numOverlaps = 0; - vector<BED> closestB; - CHRPOS minDistance = INT_MAX; - int32_t curDistance = INT_MAX; - vector<int32_t> distances; - - // is there at least one feature in B on the same chrom - // as the current A feature? - if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) { - - while ((numOverlaps == 0) && (slop <= MAXSLOP)) { - - // add some slop (starting at 0 bases) to a in hopes - // of finding a hit in B - if ((static_cast<int>(a.start) - slop) > 0) - aFudgeStart = a.start - slop; - else - aFudgeStart = 0; - - if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP)) - aFudgeEnd = a.end + slop; - else - aFudgeEnd = 2 * MAXSLOP; - - // THE HEAVY LIFTING - // search for hits with the current slop added - _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _sameStrand, _diffStrand); - - vector<BED>::const_iterator h = hits.begin(); - vector<BED>::const_iterator hitsEnd = hits.end(); - for (; h != hitsEnd; ++h) { - - // do the actual features overlap? - int s = max(a.start, h->start); - int e = min(a.end, h->end); - int overlapBases = (e - s); // the number of overlapping bases b/w a and b - - // make sure we allow overlapping features. - if ((overlapBases > 0) && (_ignoreOverlaps == true)) - continue; - else - numOverlaps++; - - // there is overlap. make sure we allow overlapping features () - if (overlapBases > 0) { - closestB.push_back(*h); - distances.push_back(0); - } - // the hit is to the "left" of A - else if (h->end <= a.start) { - curDistance = a.start - h->end; - if (_signDistance) { - if ((_strandedDistMode == "ref") - || (_strandedDistMode == "a" && a.strand != "-") - || (_strandedDistMode == "b" && h->strand == "-")) { - curDistance = -curDistance; - } - } - - if (abs(curDistance) < minDistance) { - minDistance = abs(curDistance); - - closestB.clear(); - closestB.push_back(*h); - distances.clear(); - distances.push_back(curDistance); - } - else if (abs(curDistance) == minDistance) { - minDistance = abs(curDistance); - closestB.push_back(*h); - distances.push_back(curDistance); - } - } - // the hit is to the "right" of A - else if (h->start >= a.end) { - curDistance = h->start - a.end; - if (_signDistance) { - if ((_strandedDistMode == "a" && a.strand == "-") - || (_strandedDistMode == "b" && h->strand != "-")) { - curDistance = -curDistance; - } - } - if (abs(curDistance) < minDistance) { - minDistance = abs(curDistance); - closestB.clear(); - closestB.push_back(*h); - distances.clear(); - distances.push_back(curDistance); - } - else if (abs(curDistance) == minDistance) { - minDistance = abs(curDistance); - closestB.push_back(*h); - distances.push_back(curDistance); - } - } - } - // if no overlaps were found, we'll widen the range - // by SLOPGROWTH in each direction and search again. - slop += SLOPGROWTH; - } - } - // there is no feature in B on the same chromosome as A - else { - _bedA->reportBedTab(a); - if (_reportDistance == true) { - _bedB->reportNullBedTab(); - cout << -1 << endl; - } - else - _bedB->reportNullBedNewLine(); - } - - // report the closest feature(s) in B to the current A feature. - // obey the user's reporting request (_tieMode) - if (numOverlaps > 0) { - if (closestB.size() == 1 || _tieMode == "first") { - _bedA->reportBedTab(a); - if (_reportDistance == true) { - _bedB->reportBedTab(closestB[0]); - cout << distances[0] << endl; - } - else - _bedB->reportBedNewLine(closestB[0]); - } - else { - if (_tieMode == "all") { - size_t i = 0; - for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) { - _bedA->reportBedTab(a); - if (_reportDistance == true) { - _bedB->reportBedTab(*b); - cout << distances[i++] <<endl; - } - else - _bedB->reportBedNewLine(*b); - } - } - else if (_tieMode == "last") { - _bedA->reportBedTab(a); - if (_reportDistance == true) { - _bedB->reportBedTab(closestB[closestB.size()-1]); - cout << distances[distances.size() - 1]<<endl; - } - else - _bedB->reportBedNewLine(closestB[closestB.size()-1]); - } - } - } -} - - -void BedClosest::FindClosestBed() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bedB->loadBedFileIntoMap(); - - BED a, nullBed; - int lineNum = 0; // current input line number - vector<BED> hits; // vector of potential hits - hits.reserve(100); - BedLineStatus bedStatus; - - _bedA->Open(); - // process each entry in A in search of the closest feature in B - while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - FindWindowOverlaps(a, hits); - hits.clear(); - a = nullBed; - } - } - _bedA->Close(); -} -// END ClosestBed -