diff BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp @ 1:bec36315bd12 default tip

Deleted selected files
author aaronquinlan
date Sat, 19 Nov 2011 14:17:03 -0500
parents dfcd8b6c1bda
children
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/subtractBed/subtractBed.cpp	Thu Nov 03 10:25:04 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,178 +0,0 @@
-/*****************************************************************************
-  subtractBed.cpp
-
-  (c) 2009 - Aaron Quinlan
-  Hall Laboratory
-  Department of Biochemistry and Molecular Genetics
-  University of Virginia
-  aaronquinlan@gmail.com
-
-  Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#include "lineFileUtilities.h"
-#include "subtractBed.h"
-
-
-/*
-    Constructor
-*/
-BedSubtract::BedSubtract(string &bedAFile, string &bedBFile, float overlapFraction, bool sameStrand, bool diffStrand) {
-
-    _bedAFile = bedAFile;
-    _bedBFile = bedBFile;
-    _overlapFraction = overlapFraction;
-    _sameStrand = sameStrand;
-    _diffStrand = diffStrand;
-
-    _bedA = new BedFile(bedAFile);
-    _bedB = new BedFile(bedBFile);
-
-    SubtractBed();
-}
-
-
-/*
-    Destructor
-*/
-BedSubtract::~BedSubtract(void) {
-}
-
-
-void BedSubtract::FindAndSubtractOverlaps(BED &a, vector<BED> &hits) {
-
-    // find all of the overlaps between a and B.
-    _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
-
-    //  is A completely spanned by an entry in B?
-    //  if so, A should not be reported.
-    int numConsumedByB = 0;
-    int numOverlaps = 0;
-    vector<BED> bOverlaps;  // list of hits in B.  Special processing if there are multiple.
-
-    vector<BED>::const_iterator h = hits.begin();
-    vector<BED>::const_iterator hitsEnd = hits.end();
-    for (; h != hitsEnd; ++h) {
-
-        int s = max(a.start, h->start);
-        int e = min(a.end, h->end);
-        int overlapBases = (e - s);             // the number of overlapping bases b/w a and b
-        int aLength = (a.end - a.start);        // the length of a in b.p.
-
-        if (s < e) {
-
-            // is there enough overlap (default ~ 1bp)
-            float overlap = ((float) overlapBases / (float) aLength);
-
-            if (overlap >= 1.0) {
-                numOverlaps++;
-                numConsumedByB++;
-            }
-            else if ( overlap >= _overlapFraction ) {
-                numOverlaps++;
-                bOverlaps.push_back(*h);
-            }
-        }
-    }
-
-    if (numOverlaps == 0) {
-        // no overlap found, so just report A as-is.
-        _bedA->reportBedNewLine(a);
-    }
-    else if (numOverlaps == 1) {
-        // one overlap found.  only need to look at the single
-        // entry in bOverlaps.
-
-        // if A was not "consumed" by any entry in B
-        if (numConsumedByB == 0) {
-
-            BED theHit = bOverlaps[0];
-
-            // A    ++++++++++++
-            // B        ----
-            // Res. ====    ====
-            if ( (theHit.start > a.start) && (theHit.end < a.end) ) {
-                _bedA->reportBedRangeNewLine(a,a.start,theHit.start);
-                _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
-            }
-            // A    ++++++++++++
-            // B    ----------
-            // Res.           ==
-            else if (theHit.start == a.start) {
-                _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
-            }
-            // A          ++++++++++++
-            // B    ----------
-            // Res.       ====
-            else if (theHit.start < a.start) {
-                _bedA->reportBedRangeNewLine(a,theHit.end,a.end);
-            }
-            // A    ++++++++++++
-            // B           ----------
-            // Res. =======
-            else if (theHit.start > a.start) {
-                _bedA->reportBedRangeNewLine(a,a.start,theHit.start);
-            }
-        }
-    }
-    else if (numOverlaps > 1) {
-        // multiple overlapz found.  look at all the hits
-        // and figure out which bases in A survived.  then
-        // report the contigous intervals that survived.
-
-        vector<bool> aKeep(a.end - a.start, true);
-
-        if (numConsumedByB == 0) {
-            // track the number of hit starts and ends at each position in A
-            for (vector<BED>::iterator h = bOverlaps.begin(); h != bOverlaps.end(); ++h) {
-                int s = max(a.start, h->start);
-                int e = min(a.end, h->end);
-
-                for (int i = s+1; i <= e; ++i) {
-                    aKeep[i-a.start-1] = false;
-                }
-            }
-            // report the remaining blocks.
-            for (unsigned int i = 0; i < aKeep.size(); ++i) {
-                if (aKeep[i] == true) {
-                    CHRPOS blockStart = i + a.start;
-                    while ((aKeep[i] == true) && (i < aKeep.size())) {
-                        i++;
-                    }
-                    CHRPOS blockEnd = i + a.start;
-                    blockEnd = min(a.end, blockEnd);
-                    _bedA->reportBedRangeNewLine(a,blockStart,blockEnd);
-                }
-            }
-        }
-    }
-}
-
-
-
-void BedSubtract::SubtractBed() {
-
-    // load the "B" bed file into a map so
-    // that we can easily compare "A" to it for overlaps
-    _bedB->loadBedFileIntoMap();
-
-    BED a, nullBed;
-    BedLineStatus bedStatus;
-    int lineNum = 0;                    // current input line number
-    vector<BED> hits;                   // vector of potential hits
-    // reserve some space
-    hits.reserve(100);
-
-    _bedA->Open();
-    while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
-        if (bedStatus == BED_VALID) {
-            FindAndSubtractOverlaps(a, hits);
-            hits.clear();
-            a = nullBed;
-        }
-    }
-    _bedA->Close();
-
-}
-// END Intersect
-
-