diff BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp	Thu Nov 03 10:25:04 2011 -0400
@@ -0,0 +1,253 @@
+/*****************************************************************************
+  windowBed.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "windowBed.h"
+
+
+/*
+    Constructor
+*/
+BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop,
+                     bool anyHit, bool noHit, bool writeCount, bool strandWindows,
+                     bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam) {
+
+    _bedAFile      = bedAFile;
+    _bedBFile      = bedBFile;
+
+    _leftSlop      = leftSlop;
+    _rightSlop     = rightSlop;
+
+    _anyHit              = anyHit;
+    _noHit               = noHit;
+    _writeCount          = writeCount;
+    _strandWindows       = strandWindows;
+    _matchOnSameStrand   = matchOnSameStrand;
+    _matchOnDiffStrand   = matchOnDiffStrand;
+    _bamInput            = bamInput;
+    _bamOutput           = bamOutput;
+    _isUncompressedBam   = isUncompressedBam;
+
+    _bedA          = new BedFile(bedAFile);
+    _bedB          = new BedFile(bedBFile);
+
+    if (_bamInput == false)
+        WindowIntersectBed();
+    else
+        WindowIntersectBam(_bedAFile);
+}
+
+
+
+/*
+    Destructor
+*/
+BedWindow::~BedWindow(void) {
+}
+
+
+
+void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) {
+
+    /*
+        Adjust the start and end of a based on the requested window
+    */
+
+    // update the current feature's start and end
+    // according to the slop requested (slop = 0 by default)
+    CHRPOS aFudgeStart = 0;
+    CHRPOS aFudgeEnd;
+    AddWindow(a, aFudgeStart, aFudgeEnd);
+
+    /*
+        Now report the hits (if any) based on the window around a.
+    */
+    // get the hits in B for the A feature
+    _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _matchOnSameStrand, _matchOnDiffStrand);
+
+    int numOverlaps = 0;
+
+    // loop through the hits and report those that meet the user's criteria
+    vector<BED>::const_iterator h = hits.begin();
+    vector<BED>::const_iterator hitsEnd = hits.end();
+    for (; h != hitsEnd; ++h) {
+
+        int s = max(aFudgeStart, h->start);
+        int e = min(aFudgeEnd, h->end);
+        int overlapBases = (e - s);             // the number of overlapping bases b/w a and b
+        int aLength = (a.end - a.start);        // the length of a in b.p.
+
+        if (s < e) {
+            // is there enough overlap (default ~ 1bp)
+            if ( ((float) overlapBases / (float) aLength) > 0 ) {
+                numOverlaps++;
+                if (_anyHit == false && _noHit == false && _writeCount == false) {
+                    _bedA->reportBedTab(a);
+                    _bedB->reportBedNewLine(*h);
+                }
+            }
+        }
+    }
+    if (_anyHit == true && (numOverlaps >= 1)) {
+        _bedA->reportBedNewLine(a); }
+    else if (_writeCount == true) {
+        _bedA->reportBedTab(a); printf("\t%d\n", numOverlaps);
+    }
+    else if (_noHit == true && (numOverlaps == 0)) {
+        _bedA->reportBedNewLine(a);
+    }
+}
+
+
+bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) {
+
+    // update the current feature's start and end
+    // according to the slop requested (slop = 0 by default)
+    CHRPOS aFudgeStart = 0;
+    CHRPOS aFudgeEnd;
+    AddWindow(a, aFudgeStart, aFudgeEnd);
+
+    bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnSameStrand, _matchOnDiffStrand);
+    return overlapsFound;
+}
+
+
+void BedWindow::WindowIntersectBed() {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bedB->loadBedFileIntoMap();
+
+    BED a, nullBed;
+    int lineNum = 0;                    // current input line number
+    BedLineStatus bedStatus;
+    vector<BED> hits;                   // vector of potential hits
+    hits.reserve(100);
+
+    _bedA->Open();
+    while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
+        if (bedStatus == BED_VALID) {
+            FindWindowOverlaps(a, hits);
+            hits.clear();
+            a = nullBed;
+        }
+    }
+    _bedA->Close();
+}
+
+
+void BedWindow::WindowIntersectBam(string bamFile) {
+
+    // load the "B" bed file into a map so
+    // that we can easily compare "A" to it for overlaps
+    _bedB->loadBedFileIntoMap();
+
+    // open the BAM file
+    BamReader reader;
+    BamWriter writer;
+    reader.Open(bamFile);
+
+    // get header & reference information
+    string bamHeader  = reader.GetHeaderText();
+    RefVector refs    = reader.GetReferenceData();
+
+    // open a BAM output to stdout if we are writing BAM
+    if (_bamOutput == true) {
+        // set compression mode
+        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
+        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
+        writer.SetCompressionMode(compressionMode);
+        // open our BAM writer
+        writer.Open("stdout", bamHeader, refs);
+    }
+
+    vector<BED> hits;                   // vector of potential hits
+    // reserve some space
+    hits.reserve(100);
+
+    _bedA->bedType = 6;
+    BamAlignment bam;
+    bool overlapsFound;
+    // get each set of alignments for each pair.
+    while (reader.GetNextAlignment(bam)) {
+
+        if (bam.IsMapped()) {
+            BED a;
+            a.chrom = refs.at(bam.RefID).RefName;
+            a.start = bam.Position;
+            a.end   = bam.GetEndPosition(false, false);
+
+            // build the name field from the BAM alignment.
+            a.name = bam.Name;
+            if (bam.IsFirstMate()) a.name += "/1";
+            if (bam.IsSecondMate()) a.name += "/2";
+
+            a.score  = ToString(bam.MapQuality);
+            a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";
+
+            if (_bamOutput == true) {
+                overlapsFound = FindOneOrMoreWindowOverlaps(a);
+                if (overlapsFound == true) {
+                    if (_noHit == false)
+                        writer.SaveAlignment(bam);
+                }
+                else {
+                    if (_noHit == true)
+                        writer.SaveAlignment(bam);
+                }
+            }
+            else {
+                FindWindowOverlaps(a, hits);
+                hits.clear();
+            }
+        }
+        // BAM IsMapped() is false
+        else if (_noHit == true) {
+            writer.SaveAlignment(bam);
+        }
+    }
+
+    // close the relevant BAM files.
+    reader.Close();
+    if (_bamOutput == true) {
+        writer.Close();
+    }
+}
+
+
+void BedWindow::AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd) {
+    // Does the user want to treat the windows based on strand?
+    // If so,
+    // if "+", then left is left and right is right
+    // if "-", the left is right and right is left.
+    if (_strandWindows) {
+        if (a.strand == "+") {
+            if ((int) (a.start - _leftSlop) > 0) 
+                fudgeStart = a.start - _leftSlop;
+            else fudgeStart = 0;
+            fudgeEnd = a.end + _rightSlop;
+        }
+        else {
+            if ((int) (a.start - _rightSlop) > 0) 
+                fudgeStart = a.start - _rightSlop;
+            else fudgeStart = 0;
+            fudgeEnd = a.end + _leftSlop;
+        }
+    }
+    // If not, add the windows irrespective of strand
+    else {
+        if ((int) (a.start - _leftSlop) > 0) 
+            fudgeStart = a.start - _leftSlop;
+        else fudgeStart = 0;
+        fudgeEnd = a.end + _rightSlop;
+    }
+}
+