diff BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp @ 1:bec36315bd12 default tip

Deleted selected files
author aaronquinlan
date Sat, 19 Nov 2011 14:17:03 -0500
parents dfcd8b6c1bda
children
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp	Thu Nov 03 10:25:04 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,525 +0,0 @@
-/*****************************************************************************
-  pairToBed.cpp
-
-  (c) 2009 - Aaron Quinlan
-  Hall Laboratory
-  Department of Biochemistry and Molecular Genetics
-  University of Virginia
-  aaronquinlan@gmail.com
-
-  Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#include "lineFileUtilities.h"
-#include "pairToBed.h"
-
-
-bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {
-
-    if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {
-        return true;
-    }
-    else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {
-        return true;
-    }
-    else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {
-        return true;
-    }
-    else return false;
-}
-
-
-/*
-    Constructor
-*/
-
-
-BedIntersectPE::BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,
-                               string searchType, bool sameStrand, bool diffStrand, bool bamInput,
-                               bool bamOutput,  bool uncompressedBam, bool useEditDistance) {
-
-    _bedAFilePE        = bedAFilePE;
-    _bedBFile          = bedBFile;
-    _overlapFraction   = overlapFraction;
-    _sameStrand        = sameStrand;
-    _diffStrand        = diffStrand;
-    _useEditDistance   = useEditDistance;
-    _searchType        = searchType;
-    _bamInput          = bamInput;
-    _bamOutput         = bamOutput;
-    _isUncompressedBam = uncompressedBam;
-
-    _bedA = new BedFilePE(bedAFilePE);
-    _bedB = new BedFile(bedBFile);
-
-    if (_bamInput == false)
-        IntersectBedPE();
-    else
-        IntersectBamPE(_bedAFilePE);
-}
-
-
-/*
-    Destructor
-*/
-
-BedIntersectPE::~BedIntersectPE(void) {
-}
-
-
-
-void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, const string &type) {
-
-    // list of hits on each end of BEDPE
-    // that exceed the requested overlap fraction
-    vector<BED> qualityHits1;
-    vector<BED> qualityHits2;
-
-    // count of hits on each end of BEDPE
-    // that exceed the requested overlap fraction
-    int numOverlapsEnd1 = 0;
-    int numOverlapsEnd2 = 0;
-
-    // make sure we have a valid chromosome before we search
-    if (a.chrom1 != ".") {
-        // Find the quality hits between ***end1*** of the BEDPE and the B BED file
-        _bedB->FindOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, hits1, _sameStrand, _diffStrand);
-
-        vector<BED>::const_iterator h = hits1.begin();
-        vector<BED>::const_iterator hitsEnd = hits1.end();
-        for (; h != hitsEnd; ++h) {
-
-            int s = max(a.start1, h->start);
-            int e = min(a.end1, h->end);
-            int overlapBases = (e - s);             // the number of overlapping bases b/w a and b
-            int aLength = (a.end1 - a.start1);      // the length of a in b.p.
-
-            // is there enough overlap relative to the user's request? (default ~ 1bp)
-            if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {
-                numOverlapsEnd1++;
-
-                if (type == "either") {
-                    _bedA->reportBedPETab(a);
-                    _bedB->reportBedNewLine(*h);
-                }
-                else {
-                    qualityHits1.push_back(*h);
-                }
-            }
-        }
-    }
-
-
-    // make sure we have a valid chromosome before we search
-    if (a.chrom2 != ".") {
-        // Now find the quality hits between ***end2*** of the BEDPE and the B BED file
-        _bedB->FindOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, hits2, _sameStrand, _diffStrand);
-
-        vector<BED>::const_iterator h = hits2.begin();
-        vector<BED>::const_iterator hitsEnd = hits2.end();
-        for (; h != hitsEnd; ++h) {
-
-            int s = max(a.start2, h->start);
-            int e = min(a.end2, h->end);
-            int overlapBases = (e - s);             // the number of overlapping bases b/w a and b
-            int aLength = (a.end2 - a.start2);      // the length of a in b.p.
-
-            // is there enough overlap relative to the user's request? (default ~ 1bp)
-            if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {
-                numOverlapsEnd2++;
-
-                if (type == "either") {
-                    _bedA->reportBedPETab(a);
-                    _bedB->reportBedNewLine(*h);
-                }
-                else {
-                    qualityHits2.push_back(*h);
-                }
-            }
-        }
-    }
-
-    // Now report the hits depending on what the user has requested.
-    if (type == "neither") {
-        if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 == 0) ) {
-            _bedA->reportBedPENewLine(a);
-        }
-    }
-    else if (type == "notboth") {
-        if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 == 0) ) {
-            _bedA->reportBedPENewLine(a);
-        }
-        else if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 == 0) ) {
-            for (vector<BED>::iterator q = qualityHits1.begin(); q != qualityHits1.end(); ++q) {
-                _bedA->reportBedPETab(a);
-                _bedB->reportBedNewLine(*q);
-            }
-        }
-        else if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 > 0) ) {
-            for (vector<BED>::iterator q = qualityHits2.begin(); q != qualityHits2.end(); ++q) {
-                _bedA->reportBedPETab(a);
-                _bedB->reportBedNewLine(*q);
-            }
-        }
-    }
-    else if (type == "xor") {
-        if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 == 0) ) {
-            for (vector<BED>::iterator q = qualityHits1.begin(); q != qualityHits1.end(); ++q) {
-                _bedA->reportBedPETab(a);
-                _bedB->reportBedNewLine(*q);
-            }
-        }
-        else if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 > 0) ) {
-            for (vector<BED>::iterator q = qualityHits2.begin(); q != qualityHits2.end(); ++q) {
-                _bedA->reportBedPETab(a);
-                _bedB->reportBedNewLine(*q);
-            }
-        }
-    }
-    else if (type == "both") {
-        if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 > 0) ) {
-            for (vector<BED>::iterator q = qualityHits1.begin(); q != qualityHits1.end(); ++q) {
-                _bedA->reportBedPETab(a);
-                _bedB->reportBedNewLine(*q);
-            }
-            for (vector<BED>::iterator q = qualityHits2.begin(); q != qualityHits2.end(); ++q) {
-                _bedA->reportBedPETab(a);
-                _bedB->reportBedNewLine(*q);
-            }
-        }
-    }
-}
-
-
-bool BedIntersectPE::FindOneOrMoreOverlaps(const BEDPE &a, const string &type) {
-
-    // flags for the existence of hits on each end of BEDPE
-    // that exceed the requested overlap fraction
-    bool end1Found = false;
-    bool end2Found = false;
-
-    // Look for overlaps in end 1 assuming we have an aligned chromosome.
-    if (a.chrom1 != ".") {
-        end1Found = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1,
-            _sameStrand, _diffStrand, _overlapFraction);
-
-        // can we bail out without checking end2?
-        if ((type == "either") && (end1Found == true)) return true;
-        else if ((type == "neither") && (end1Found == true)) return false;
-        else if ((type == "notboth") && (end1Found == false)) return true;
-        else if ((type == "both") && (end1Found == false)) return false;
-    }
-
-    // Now look for overlaps in end 2 assuming we have an aligned chromosome.
-    if (a.chrom2 != ".") {
-        end2Found = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2,
-            _sameStrand, _diffStrand, _overlapFraction);
-
-        if ((type == "either") && (end2Found == true)) return true;
-        else if ((type == "neither") && (end2Found == true)) return false;
-        else if ((type == "notboth") && (end2Found == false)) return true;
-        else if ((type == "both") && (end2Found == false)) return false;
-    }
-
-    // Now report the hits depending on what the user has requested.
-    if (type == "notboth") {
-        if ( (end1Found == false) || (end2Found == false) ) return true;
-        else return false;
-    }
-    else if (type == "either") {
-        if ( (end1Found == false) && (end2Found == false) ) return false;
-    }
-    else if (type == "neither") {
-        if ( (end1Found == false) && (end2Found == false) ) return true;
-        else return false;
-    }
-    else if (type == "xor") {
-        if ( (end1Found == true) && (end2Found == false) ) return true;
-        else if ( (end1Found == false) && (end2Found == true) ) return true;
-        else return false;
-    }
-    else if (type == "both") {
-        if ( (end1Found == true) && (end2Found == true) ) return true;
-        return false;
-    }
-    return false;
-}
-
-
-void BedIntersectPE::FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type) {
-
-    // count of hits on _between_ end of BEDPE
-    // that exceed the requested overlap fraction
-    int numOverlaps = 0;
-    CHRPOS spanStart = 0;
-    CHRPOS spanEnd = 0;
-    CHRPOS spanLength = 0;
-
-    if ((type == "ispan") || (type == "notispan")) {
-        spanStart = a.end1;
-        spanEnd = a.start2;
-        if (a.end1 > a.start2) {
-            spanStart = a.end2;
-            spanEnd = a.start1;
-        }
-    }
-    else if ((type == "ospan") || (type == "notospan")) {
-        spanStart = a.start1;
-        spanEnd = a.end2;
-        if (a.start1 > a.start2) {
-            spanStart = a.start2;
-            spanEnd = a.end1;
-        }
-    }
-    spanLength = spanEnd - spanStart;
-
-    // get the hits for the span
-    _bedB->FindOverlapsPerBin(a.chrom1, spanStart, spanEnd, a.strand1, hits, _sameStrand, _diffStrand);
-
-    vector<BED>::const_iterator h = hits.begin();
-    vector<BED>::const_iterator hitsEnd = hits.end();
-    for (; h != hitsEnd; ++h) {
-
-        int s = max(spanStart, h->start);
-        int e = min(spanEnd, h->end);
-        int overlapBases = (e - s);                     // the number of overlapping bases b/w a and b
-        int spanLength = (spanEnd - spanStart);     // the length of a in b.p.
-
-        // is there enough overlap relative to the user's request? (default ~ 1bp)
-        if ( ( (float) overlapBases / (float) spanLength ) >= _overlapFraction ) {
-            numOverlaps++;
-            if ((type == "ispan") || (type == "ospan")) {
-                _bedA->reportBedPETab(a);
-                _bedB->reportBedNewLine(*h);
-            }
-        }
-    }
-
-    if ( ( (type == "notispan") || (type == "notospan") ) && numOverlaps == 0 ) {
-        _bedA->reportBedPENewLine(a);
-    }
-}
-
-
-bool BedIntersectPE::FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type) {
-
-    int spanStart = 0;
-    int spanEnd = 0;
-    int spanLength = 0;
-    bool overlapFound;
-
-    if ((type == "ispan") || (type == "notispan")) {
-        spanStart = a.end1;
-        spanEnd = a.start2;
-        if (a.end1 > a.start2) {
-            spanStart = a.end2;
-            spanEnd = a.start1;
-        }
-    }
-    else if ((type == "ospan") || (type == "notospan")) {
-        spanStart = a.start1;
-        spanEnd = a.end2;
-        if (a.start1 > a.start2) {
-            spanStart = a.start2;
-            spanEnd = a.end1;
-        }
-    }
-    spanLength = spanEnd - spanStart;
-
-    overlapFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom1, spanStart, spanEnd, a.strand1,
-        _sameStrand, _diffStrand, _overlapFraction);
-
-    return overlapFound;
-}
-
-
-void BedIntersectPE::IntersectBedPE() {
-
-    // load the "B" bed file into a map so
-    // that we can easily compare "A" to it for overlaps
-    _bedB->loadBedFileIntoMap();
-
-    int lineNum = 0;                    // current input line number
-    vector<BED> hits, hits1, hits2;     // vector of potential hits
-
-    // reserve some space
-    hits.reserve(100);
-    hits1.reserve(100);
-    hits2.reserve(100);
-
-    BEDPE a, nullBedPE;
-    BedLineStatus bedStatus;
-
-    _bedA->Open();
-    while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {
-        if (bedStatus == BED_VALID) {
-            if ( (_searchType == "ispan") || (_searchType == "ospan") ||
-                 (_searchType == "notispan") || (_searchType == "notospan") ) {
-                if (a.chrom1 == a.chrom2) {
-                    FindSpanningOverlaps(a, hits, _searchType);
-                    hits.clear();
-                }
-            }
-            else {
-                FindOverlaps(a, hits1, hits2, _searchType);
-                hits1.clear();
-                hits2.clear();
-            }
-            a = nullBedPE;
-        }
-    }
-    _bedA->Close();
-}
-
-
-void BedIntersectPE::IntersectBamPE(string bamFile) {
-
-    // load the "B" bed file into a map so
-    // that we can easily compare "A" to it for overlaps
-    _bedB->loadBedFileIntoMap();
-
-    // open the BAM file
-    BamReader reader;
-    BamWriter writer;
-    reader.Open(bamFile);
-
-    // get header & reference information
-    string bamHeader = reader.GetHeaderText();
-    RefVector refs   = reader.GetReferenceData();
-
-    // open a BAM output to stdout if we are writing BAM
-    if (_bamOutput == true) {
-        // set compression mode
-        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
-        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
-        writer.SetCompressionMode(compressionMode);
-        // open our BAM writer
-        writer.Open("stdout", bamHeader, refs);
-    }
-
-    // track the previous and current sequence
-    // names so that we can identify blocks of
-    // alignments for a given read ID.
-    string prevName, currName;
-    prevName = currName = "";
-
-    vector<BamAlignment> alignments;        // vector of BAM alignments for a given ID in a BAM file.
-    alignments.reserve(100);
-
-    _bedA->bedType = 10;                    // it's a full BEDPE given it's BAM
-
-    // rip through the BAM file and convert each mapped entry to BEDPE
-    BamAlignment bam1, bam2;
-    while (reader.GetNextAlignment(bam1)) {
-        // the alignment must be paired
-        if (bam1.IsPaired() == true) {
-            // grab the second alignment for the pair.
-            reader.GetNextAlignment(bam2);
-
-            // require that the alignments are from the same query
-            if (bam1.Name == bam2.Name) {
-                ProcessBamBlock(bam1, bam2, refs, writer);
-            }
-            else {
-                cerr << "*****ERROR: BAM must be sorted by query name. " << endl;
-                exit(1);
-            }
-        }
-    }
-    // close up
-    reader.Close();
-    if (_bamOutput == true) {
-        writer.Close();
-    }
-}
-
-
-void BedIntersectPE::ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,
-                                      const RefVector &refs, BamWriter &writer) {
-
-    vector<BED> hits, hits1, hits2;         // vector of potential hits
-    hits.reserve(1000);                     // reserve some space
-    hits1.reserve(1000);
-    hits2.reserve(1000);
-
-    bool overlapsFound;                     // flag to indicate if overlaps were found
-
-    if ( (_searchType == "either") || (_searchType == "xor") ||
-              (_searchType == "both") || (_searchType == "notboth") ||
-              (_searchType == "neither") ) {
-
-        // create a new BEDPE feature from the BAM alignments.
-        BEDPE a;
-        ConvertBamToBedPE(bam1, bam2, refs, a);
-        if (_bamOutput == true) {   // BAM output
-            // write to BAM if correct hits found
-            overlapsFound = FindOneOrMoreOverlaps(a, _searchType);
-            if (overlapsFound == true) {
-                writer.SaveAlignment(bam1);
-                writer.SaveAlignment(bam2);
-            }
-        }
-        else {  // BEDPE output
-            FindOverlaps(a, hits1, hits2, _searchType);
-            hits1.clear();
-            hits2.clear();
-        }
-    }
-    else if ( (_searchType == "ispan") || (_searchType == "ospan") ) {
-        // only look for ispan and ospan when both ends are mapped.
-        if (bam1.IsMapped() && bam2.IsMapped()) {
-            // only do an inspan or outspan check if the alignment is intrachromosomal
-            if (bam1.RefID == bam2.RefID) {
-                // create a new BEDPE feature from the BAM alignments.
-                BEDPE a;
-                ConvertBamToBedPE(bam1, bam2, refs, a);
-                if (_bamOutput == true) {   // BAM output
-                    // look for overlaps, and write to BAM if >=1 were found
-                    overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);
-                    if (overlapsFound == true) {
-                        writer.SaveAlignment(bam1);
-                        writer.SaveAlignment(bam2);
-                    }
-                }
-                else {  // BEDPE output
-                    FindSpanningOverlaps(a, hits, _searchType);
-                    hits.clear();
-                }
-            }
-        }
-    }
-    else if ( (_searchType == "notispan") || (_searchType == "notospan") ) {
-        // only look for notispan and notospan when both ends are mapped.
-        if (bam1.IsMapped() && bam2.IsMapped()) {
-            // only do an inspan or outspan check if the alignment is intrachromosomal
-            if (bam1.RefID == bam2.RefID) {
-                // create a new BEDPE feature from the BAM alignments.
-                BEDPE a;
-                ConvertBamToBedPE(bam1, bam2, refs, a);
-                if (_bamOutput == true) {   // BAM output
-                    // write to BAM if there were no overlaps
-                    overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);
-                    if (overlapsFound == false) {
-                        writer.SaveAlignment(bam1);
-                        writer.SaveAlignment(bam2);
-                    }
-                }
-                else {  // BEDPE output
-                    FindSpanningOverlaps(a, hits, _searchType);
-                    hits.clear();
-                }
-            }
-            // if inter-chromosomal or orphaned, we know it's not ispan and not ospan
-            else if (_bamOutput == true) {
-                writer.SaveAlignment(bam1);
-                writer.SaveAlignment(bam2);
-            }
-        }
-        // if both ends aren't mapped, we know that it's notispan and not ospan
-        else if (_bamOutput == true) {
-            writer.SaveAlignment(bam1);
-            writer.SaveAlignment(bam2);
-        }
-    }
-}
-
-