diff BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp @ 1:bec36315bd12 default tip

Deleted selected files
author aaronquinlan
date Sat, 19 Nov 2011 14:17:03 -0500
parents dfcd8b6c1bda
children
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/intersectBed/intersectBed.cpp	Thu Nov 03 10:25:04 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,367 +0,0 @@
-/*****************************************************************************
-  intersectBed.cpp
-
-  (c) 2009 - Aaron Quinlan
-  Hall Laboratory
-  Department of Biochemistry and Molecular Genetics
-  University of Virginia
-  aaronquinlan@gmail.com
-
-  Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#include "lineFileUtilities.h"
-#include "intersectBed.h"
-
-/************************************
-Helper functions
-************************************/
-bool BedIntersect::processHits(const BED &a, const vector<BED> &hits) {
-
-    // how many overlaps are there b/w the bed and the set of hits?
-    CHRPOS s, e;
-    int overlapBases;
-    int  numOverlaps = 0;
-    bool hitsFound   = false;
-    int aLength      = (a.end - a.start);   // the length of a in b.p.
-
-    // loop through the hits and report those that meet the user's criteria
-    vector<BED>::const_iterator h       = hits.begin();
-    vector<BED>::const_iterator hitsEnd = hits.end();
-    for (; h != hitsEnd; ++h) {
-        s            = max(a.start, h->start);
-        e            = min(a.end, h->end);
-        overlapBases = (e - s);             // the number of overlapping bases b/w a and b
-
-        // is there enough overlap relative to the user's request? (default ~ 1bp)
-        if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {
-            // Report the hit if the user doesn't care about reciprocal overlap between A and B.
-            if (_reciprocal == false) {
-                hitsFound = true;
-                numOverlaps++;
-                if (_printable == true)
-                    ReportOverlapDetail(overlapBases, a, *h, s, e);
-            }
-            // we require there to be sufficient __reciprocal__ overlap
-            else {
-                int bLength    = (h->end - h->start);
-                float bOverlap = ( (float) overlapBases / (float) bLength );
-                if (bOverlap >= _overlapFraction) {
-                    hitsFound = true;
-                    numOverlaps++;
-                    if (_printable == true)
-                        ReportOverlapDetail(overlapBases, a, *h, s, e);
-                }
-            }
-        }
-    }
-    // report the summary of the overlaps if requested.
-    ReportOverlapSummary(a, numOverlaps);
-    // were hits found for this BED feature?
-    return hitsFound;
-}
-
-
-/*
-    Constructor
-*/
-BedIntersect::BedIntersect(string bedAFile, string bedBFile, bool anyHit,
-                           bool writeA, bool writeB, bool writeOverlap, bool writeAllOverlap,
-                           float overlapFraction, bool noHit, bool writeCount, bool sameStrand, bool diffStrand,
-                           bool reciprocal, bool obeySplits, bool bamInput, bool bamOutput, bool isUncompressedBam,
-                           bool sortedInput) {
-
-    _bedAFile            = bedAFile;
-    _bedBFile            = bedBFile;
-    _anyHit              = anyHit;
-    _noHit               = noHit;
-    _writeA              = writeA;
-    _writeB              = writeB;
-    _writeOverlap        = writeOverlap;
-    _writeAllOverlap     = writeAllOverlap;
-    _writeCount          = writeCount;
-    _overlapFraction     = overlapFraction;
-    _sameStrand          = sameStrand;
-    _diffStrand          = diffStrand;
-    _reciprocal          = reciprocal;
-    _obeySplits          = obeySplits;
-    _bamInput            = bamInput;
-    _bamOutput           = bamOutput;
-    _isUncompressedBam   = isUncompressedBam;
-    _sortedInput         = sortedInput;
-
-    // should we print each overlap, or does the user want summary information?
-    _printable = true;
-    if (_anyHit || _noHit || _writeCount)
-        _printable = false;
-        
-    if (_bamInput == false)
-        IntersectBed();
-    else
-        IntersectBam(bedAFile);
-}
-
-
-/*
-    Destructor
-*/
-BedIntersect::~BedIntersect(void) {
-}
-
-
-bool BedIntersect::FindOverlaps(const BED &a, vector<BED> &hits) {
-    bool hitsFound = false;
-    
-    // collect and report the sufficient hits
-    _bedB->FindOverlapsPerBin(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand);
-    hitsFound = processHits(a, hits);
-    return hitsFound;
-}
-
-
-void BedIntersect::ReportOverlapDetail(int overlapBases, const BED &a, const BED &b, CHRPOS s, CHRPOS e) {
-    // default. simple intersection only
-    if (_writeA == false && _writeB == false && _writeOverlap == false) {
-        _bedA->reportBedRangeNewLine(a,s,e);
-    }
-    //  -wa -wb write the original A and B
-    else if (_writeA == true && _writeB == true) {
-        _bedA->reportBedTab(a);
-        _bedB->reportBedNewLine(b);
-    }
-    // -wa write just the original A
-    else if (_writeA == true) {
-        _bedA->reportBedNewLine(a);
-    }
-    // -wb write the intersected portion of A and the original B
-    else if (_writeB == true) {
-        _bedA->reportBedRangeTab(a,s,e);
-        _bedB->reportBedNewLine(b);
-    }
-    // -wo write the original A and B plus the no. of overlapping bases.
-    else if (_writeOverlap == true) {
-        _bedA->reportBedTab(a);
-        _bedB->reportBedTab(b);
-        if (b.zeroLength == false) printf("%d\n", overlapBases);
-        else printf("0\n");
-    }
-}
-
-
-void BedIntersect::ReportOverlapSummary(const BED &a, const int &numOverlapsFound) {
-    // -u  just report the fact that there was >= 1 overlaps
-    if (_anyHit && (numOverlapsFound >= 1)) {
-        _bedA->reportBedNewLine(a);
-    }
-    // -c  report the total number of features overlapped in B
-    else if (_writeCount) {
-        _bedA->reportBedTab(a);
-        printf("%d\n", numOverlapsFound);
-    }
-    // -v  report iff there were no overlaps
-    else if (_noHit && (numOverlapsFound == 0)) {
-        _bedA->reportBedNewLine(a);
-    }
-    // -wao the user wants to force the reporting of 0 overlap
-    else if (_writeAllOverlap && (numOverlapsFound == 0)) {
-        _bedA->reportBedTab(a);
-        _bedB->reportNullBedTab();
-        printf("0\n");
-    }
-}
-
-
-bool BedIntersect::FindOneOrMoreOverlap(const BED &a) {
-    bool overlapsFound = false;
-    if (_reciprocal == false) {
-        overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand,
-                                                          _sameStrand, _diffStrand, _overlapFraction);
-    }
-    else {
-        overlapsFound = _bedB->FindOneOrMoreReciprocalOverlapsPerBin(a.chrom, a.start, a.end, a.strand,
-                                                                    _sameStrand, _diffStrand, _overlapFraction);
-    }
-    return overlapsFound;
-}
-
-
-void BedIntersect::IntersectBed() {
-
-    // create new BED file objects for A and B
-    _bedA = new BedFile(_bedAFile);
-    _bedB = new BedFile(_bedBFile);
-
-    if (_sortedInput == false) {
-        // load the "B" file into a map in order to
-        // compare each entry in A to it in search of overlaps.
-        _bedB->loadBedFileIntoMap();
-
-        int lineNum = 0;
-        vector<BED> hits;
-        hits.reserve(100);
-        BED a, nullBed;
-        BedLineStatus bedStatus;
-
-        // open the "A" file, process each BED entry and searh for overlaps.
-        _bedA->Open();
-        while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
-            if (bedStatus == BED_VALID) {
-                // treat the BED as a single "block"
-                if (_obeySplits == false) {
-                    FindOverlaps(a, hits);
-                    hits.clear();
-                }
-                // split the BED12 into blocks and look for overlaps in each discrete block
-                else {
-                    bedVector bedBlocks;  // vec to store the discrete BED "blocks"
-                    splitBedIntoBlocks(a, lineNum, bedBlocks);
-
-                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
-                    vector<BED>::const_iterator bedEnd  = bedBlocks.end();
-                    for (; bedItr != bedEnd; ++bedItr) {
-                        FindOverlaps(*bedItr, hits);
-                        hits.clear();
-                    }
-                }
-            }
-            a = nullBed;
-        }
-        _bedA->Close();
-    }
-    else {
-        // use the chromsweep algorithm to detect overlaps on the fly.
-        ChromSweep sweep = ChromSweep(_bedA, _bedB, _sameStrand, _diffStrand);
-
-        pair<BED, vector<BED> > hit_set;
-        hit_set.second.reserve(10000);
-        while (sweep.Next(hit_set)) {
-            processHits(hit_set.first, hit_set.second);
-        }
-    }
-}
-
-
-void BedIntersect::IntersectBam(string bamFile) {
-
-    // load the "B" bed file into a map so
-    // that we can easily compare "A" to it for overlaps
-    _bedB = new BedFile(_bedBFile);
-    _bedB->loadBedFileIntoMap();
-
-    // create a dummy BED A file for printing purposes if not
-    // using BAM output.
-    if (_bamOutput == false) {
-        _bedA = new BedFile(_bedAFile);
-        _bedA->bedType = 6;
-    }
-
-    // open the BAM file
-    BamReader reader;
-    BamWriter writer;
-    
-    reader.Open(bamFile);
-
-    // get header & reference information
-    string bamHeader  = reader.GetHeaderText();
-    RefVector refs    = reader.GetReferenceData();
-
-    // open a BAM output to stdout if we are writing BAM
-    if (_bamOutput == true) {
-        // set compression mode
-        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
-        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
-        writer.SetCompressionMode(compressionMode);
-        // open our BAM writer
-        writer.Open("stdout", bamHeader, refs);
-    }
-
-    vector<BED> hits;
-    // reserve some space
-    hits.reserve(100);
-
-    
-    BamAlignment bam;    
-    // get each set of alignments for each pair.
-    while (reader.GetNextAlignment(bam)) {
-
-        if (bam.IsMapped()) {
-            BED a;
-            a.chrom = refs.at(bam.RefID).RefName;
-            a.start = bam.Position;
-            a.end   = bam.GetEndPosition(false, false);
-
-            // build the name field from the BAM alignment.
-            a.name = bam.Name;
-            if (bam.IsFirstMate()) a.name += "/1";
-            if (bam.IsSecondMate()) a.name += "/2";
-
-            a.score  = ToString(bam.MapQuality);
-
-            a.strand = "+";
-            if (bam.IsReverseStrand()) a.strand = "-";
-
-            if (_bamOutput == true) {
-                bool overlapsFound = false;
-                // treat the BAM alignment as a single "block"
-                if (_obeySplits == false) {
-                    overlapsFound = FindOneOrMoreOverlap(a);
-                }
-                // split the BAM alignment into discrete blocks and
-                // look for overlaps only within each block.
-                else {
-                    bool overlapFoundForBlock;
-                    bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
-                    // we don't want to split on "D" ops, hence the "false"
-                    getBamBlocks(bam, refs, bedBlocks, false);
-
-                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
-                    vector<BED>::const_iterator bedEnd  = bedBlocks.end();
-                    for (; bedItr != bedEnd; ++bedItr) {
-                        overlapFoundForBlock = FindOneOrMoreOverlap(*bedItr);
-                        if (overlapFoundForBlock == true)
-                            overlapsFound = true;
-                    }
-                }
-                if (overlapsFound == true) {
-                    if (_noHit == false)
-                        writer.SaveAlignment(bam);
-                }
-                else {
-                    if (_noHit == true) {
-                        writer.SaveAlignment(bam);
-                    }
-                }
-            }
-            else {
-                // treat the BAM alignment as a single BED "block"
-                if (_obeySplits == false) {
-                    FindOverlaps(a, hits);
-                    hits.clear();
-                }
-                // split the BAM alignment into discrete BED blocks and
-                // look for overlaps only within each block.
-                else {
-                    bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
-                    getBamBlocks(bam, refs, bedBlocks, false);
-
-                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
-                    vector<BED>::const_iterator bedEnd  = bedBlocks.end();
-                    for (; bedItr != bedEnd; ++bedItr) {
-                        FindOverlaps(*bedItr, hits);
-                        hits.clear();
-                    }
-                }
-            }
-        }
-        // BAM IsMapped() is false
-        else if (_noHit == true) {
-            writer.SaveAlignment(bam);
-        }
-    }
-
-    // close the relevant BAM files.
-    reader.Close();
-    if (_bamOutput == true) {
-        writer.Close();
-    }
-}
-