diff BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/closestBed/closestBed.cpp	Thu Nov 03 10:25:04 2011 -0400
@@ -0,0 +1,234 @@
+/*****************************************************************************
+  closestBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "closestBed.h"
+
+const int MAXSLOP = 256000000;  // 2*MAXSLOP = 512 megabases.
+                                // We don't want to keep looking if we
+                                // can't find a nearby feature within 512 Mb.
+const int SLOPGROWTH = 2048000;
+
+
+/*
+    Constructor
+*/
+BedClosest::BedClosest(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,
+                       string &tieMode, bool reportDistance, bool signDistance, string &_strandedDistMode,
+                       bool ignoreOverlaps) 
+    : _bedAFile(bedAFile)
+    , _bedBFile(bedBFile)
+    , _tieMode(tieMode)
+    , _sameStrand(sameStrand)
+    , _diffStrand(diffStrand)
+    , _reportDistance(reportDistance)
+    , _signDistance(signDistance)
+    , _strandedDistMode(_strandedDistMode)
+    , _ignoreOverlaps(ignoreOverlaps)
+{
+    _bedA           = new BedFile(_bedAFile);
+    _bedB           = new BedFile(_bedBFile);
+    FindClosestBed();
+}
+
+
+/*
+    Destructor
+*/
+BedClosest::~BedClosest(void) {
+}
+
+
+void BedClosest::FindWindowOverlaps(BED &a, vector<BED> &hits) {
+
+    int slop = 0;  // start out just looking for overlaps
+                   // within the current bin (~128Kb)
+
+    // update the current feature's start and end
+
+    CHRPOS aFudgeStart = 0;
+    CHRPOS aFudgeEnd;
+    int numOverlaps = 0;
+    vector<BED> closestB;
+    CHRPOS minDistance = INT_MAX;
+    int32_t curDistance = INT_MAX;
+    vector<int32_t> distances;
+
+    // is there at least one feature in B on the same chrom
+    // as the current A feature?
+    if(_bedB->bedMap.find(a.chrom) != _bedB->bedMap.end()) {
+
+        while ((numOverlaps == 0) && (slop <= MAXSLOP)) {
+
+            // add some slop (starting at 0 bases) to a in hopes
+            // of finding a hit in B
+            if ((static_cast<int>(a.start) - slop) > 0)
+                aFudgeStart = a.start - slop;
+            else
+                aFudgeStart = 0;
+
+            if ((static_cast<int>(a.start) + slop) < (2 * MAXSLOP))
+                aFudgeEnd = a.end + slop;
+            else
+                aFudgeEnd = 2 * MAXSLOP;
+
+            // THE HEAVY LIFTING
+            // search for hits with the current slop added
+            _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _sameStrand, _diffStrand);
+
+            vector<BED>::const_iterator h = hits.begin();
+            vector<BED>::const_iterator hitsEnd = hits.end();
+            for (; h != hitsEnd; ++h) {
+
+                // do the actual features overlap?
+                int s = max(a.start, h->start);
+                int e = min(a.end, h->end);
+                int overlapBases = (e - s);             // the number of overlapping bases b/w a and b
+
+                // make sure we allow overlapping features.
+                if ((overlapBases > 0) && (_ignoreOverlaps == true))
+                    continue;
+                else
+                    numOverlaps++;
+
+                // there is overlap. make sure we allow overlapping features ()
+                if (overlapBases > 0) {
+                    closestB.push_back(*h);
+                    distances.push_back(0);
+                }
+                // the hit is to the "left" of A
+                else if (h->end <= a.start) {
+                    curDistance = a.start - h->end;
+                    if (_signDistance) {
+                        if ((_strandedDistMode == "ref")
+                                || (_strandedDistMode == "a" && a.strand != "-")
+                                || (_strandedDistMode == "b" && h->strand == "-")) {
+                            curDistance = -curDistance;
+                        }
+                    }
+                    
+                    if (abs(curDistance) < minDistance) {
+                        minDistance = abs(curDistance);
+                        
+                        closestB.clear();
+                        closestB.push_back(*h);
+                        distances.clear();
+                        distances.push_back(curDistance);
+                    }
+                    else if (abs(curDistance) == minDistance) {
+                        minDistance = abs(curDistance);
+                        closestB.push_back(*h);
+                        distances.push_back(curDistance);
+                    }
+                }
+                // the hit is to the "right" of A
+                else if (h->start >= a.end) {
+                    curDistance = h->start - a.end;
+                    if (_signDistance) {
+                        if ((_strandedDistMode == "a" && a.strand == "-")
+                                || (_strandedDistMode == "b" && h->strand != "-")) {
+                            curDistance = -curDistance;
+                        }
+                    }
+                    if (abs(curDistance) < minDistance) {
+                        minDistance = abs(curDistance);
+                        closestB.clear();
+                        closestB.push_back(*h);
+                        distances.clear();
+                        distances.push_back(curDistance);
+                    }
+                    else if (abs(curDistance) == minDistance) {
+                        minDistance = abs(curDistance);
+                        closestB.push_back(*h);
+                        distances.push_back(curDistance);
+                    }
+                }
+            }
+            // if no overlaps were found, we'll widen the range
+            // by SLOPGROWTH in each direction and search again.
+            slop += SLOPGROWTH;
+        }
+    }
+    // there is no feature in B on the same chromosome as A
+    else {
+        _bedA->reportBedTab(a);
+        if (_reportDistance == true) {
+            _bedB->reportNullBedTab();
+            cout << -1 << endl;
+        }
+        else
+            _bedB->reportNullBedNewLine();
+    }
+
+    // report the closest feature(s) in B to the current A feature.
+    // obey the user's reporting request (_tieMode)
+    if (numOverlaps > 0) {
+        if (closestB.size() == 1 || _tieMode == "first") {
+            _bedA->reportBedTab(a);
+            if (_reportDistance == true) {
+                _bedB->reportBedTab(closestB[0]);
+                cout << distances[0] << endl;
+            }
+            else
+                _bedB->reportBedNewLine(closestB[0]);
+        }
+        else {
+            if (_tieMode == "all") {
+                size_t i = 0;
+                for (vector<BED>::iterator b = closestB.begin(); b != closestB.end(); ++b) {
+                    _bedA->reportBedTab(a);
+                    if (_reportDistance == true) {
+                        _bedB->reportBedTab(*b);
+                        cout << distances[i++] <<endl;
+                    }
+                    else
+                        _bedB->reportBedNewLine(*b);
+                }
+            }
+            else if (_tieMode == "last") {
+                _bedA->reportBedTab(a);
+                if (_reportDistance == true) {
+                    _bedB->reportBedTab(closestB[closestB.size()-1]);
+                    cout << distances[distances.size() - 1]<<endl;
+                }
+                else
+                    _bedB->reportBedNewLine(closestB[closestB.size()-1]);
+            }
+        }
+    }
+}
+
+
+void BedClosest::FindClosestBed() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bedB->loadBedFileIntoMap();
+
+    BED a, nullBed;
+    int lineNum = 0;                    // current input line number
+    vector<BED> hits;                   // vector of potential hits
+    hits.reserve(100);
+    BedLineStatus bedStatus;
+
+    _bedA->Open();
+    // process each entry in A in search of the closest feature in B
+    while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+            FindWindowOverlaps(a, hits);
+            hits.clear();
+            a = nullBed;
+        }
+    }
+    _bedA->Close();
+}
+// END ClosestBed
+