Mercurial > repos > aaronquinlan > multi_intersect
view BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line source
/***************************************************************************** chromsweep.cpp (c) 2009 - Aaron Quinlan Hall Laboratory Department of Biochemistry and Molecular Genetics University of Virginia aaronquinlan@gmail.com Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ #include "lineFileUtilities.h" #include "chromsweep.h" #include <queue> bool after(const BED &a, const BED &b); void report_hits(const BED &curr_qy, const vector<BED> &hits); vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits); /* // constructor using existing BedFile pointers */ ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand) : _bedA(bedA) , _bedB(bedB) , _sameStrand(sameStrand) , _diffStrand(diffStrand) { // prime the results pump. _qy_lineNum = 0; _db_lineNum = 0; _hits.reserve(1000); _cache.reserve(1000); _bedA->Open(); _bedB->Open(); _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); } /* Constructor with filenames */ ChromSweep::ChromSweep(string &bedAFile, string &bedBFile) { // prime the results pump. _qy_lineNum = 0; _db_lineNum = 0; _hits.reserve(100000); _cache.reserve(100000); _bedA = new BedFile(bedAFile); _bedB = new BedFile(bedBFile); _bedA->Open(); _bedB->Open(); _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); } /* Destructor */ ChromSweep::~ChromSweep(void) { } void ChromSweep::ScanCache() { if (_qy_status != BED_INVALID) { vector<BED>::iterator c = _cache.begin(); while (c != _cache.end()) { if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) { if (IsValidHit(_curr_qy, *c)) { _hits.push_back(*c); } ++c; } else { c = _cache.erase(c); } } } } bool ChromSweep::ChromChange() { // the files are on the same chrom if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) { return false; } // the query is ahead of the database. fast-forward the database to catch-up. else if (_curr_qy.chrom > _curr_db.chrom) { while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom) { _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); } _cache.clear(); return false; } // the database is ahead of the query. else { // 1. scan the cache for remaining hits on the query's current chrom. if (_curr_qy.chrom == _curr_chrom) { ScanCache(); _results.push(make_pair(_curr_qy, _hits)); _hits.clear(); } // 2. fast-forward until we catch up and report 0 hits until we do. else if (_curr_qy.chrom < _curr_db.chrom) { _results.push(make_pair(_curr_qy, _no_hits)); _cache.clear(); } _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); _curr_chrom = _curr_qy.chrom; return true; } } bool ChromSweep::IsValidHit(const BED &query, const BED &db) { // do we have an overlap in the DB? if (overlaps(query.start, query.end, db.start, db.end) > 0) { // Now test for necessary strandedness. bool strands_are_same = (query.strand == db.strand); if ( (_sameStrand == false && _diffStrand == false) || (_sameStrand == true && strands_are_same == true) || (_diffStrand == true && strands_are_same == false) ) { return true; } } return false; } bool ChromSweep::Next(pair<BED, vector<BED> > &next) { if (!_bedA->Empty()) { // have we changed chromosomes? if (ChromChange() == false) { // scan the database cache for hits ScanCache(); // advance the db until we are ahead of the query. update hits and cache as necessary while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) { if (IsValidHit(_curr_qy, _curr_db)) { _hits.push_back(_curr_db); } _cache.push_back(_curr_db); _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); } // add the hits for this query to the pump _results.push(make_pair(_curr_qy, _hits)); // reset for the next query _hits.clear(); _curr_qy = _nullBed; _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); _curr_chrom = _curr_qy.chrom; } } // report the next set if hits if there are still overlaps in the pump if (!_results.empty()) { next = _results.front(); _results.pop(); return true; } // otherwise, the party is over. else {return false;} }