Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp Thu Nov 03 10:25:04 2011 -0400 @@ -0,0 +1,180 @@ +/***************************************************************************** + chromsweep.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "chromsweep.h" +#include <queue> + +bool after(const BED &a, const BED &b); +void report_hits(const BED &curr_qy, const vector<BED> &hits); +vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits); + + +/* + // constructor using existing BedFile pointers +*/ +ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand) +: _bedA(bedA) +, _bedB(bedB) +, _sameStrand(sameStrand) +, _diffStrand(diffStrand) +{ + // prime the results pump. + _qy_lineNum = 0; + _db_lineNum = 0; + + _hits.reserve(1000); + _cache.reserve(1000); + + _bedA->Open(); + _bedB->Open(); + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); +} + +/* + Constructor with filenames +*/ +ChromSweep::ChromSweep(string &bedAFile, string &bedBFile) +{ + // prime the results pump. + _qy_lineNum = 0; + _db_lineNum = 0; + + _hits.reserve(100000); + _cache.reserve(100000); + + _bedA = new BedFile(bedAFile); + _bedB = new BedFile(bedBFile); + + _bedA->Open(); + _bedB->Open(); + + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); +} + + +/* + Destructor +*/ +ChromSweep::~ChromSweep(void) { +} + + +void ChromSweep::ScanCache() { + if (_qy_status != BED_INVALID) { + vector<BED>::iterator c = _cache.begin(); + while (c != _cache.end()) + { + if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) { + if (IsValidHit(_curr_qy, *c)) { + _hits.push_back(*c); + } + ++c; + } + else { + c = _cache.erase(c); + } + } + } +} + + +bool ChromSweep::ChromChange() +{ + // the files are on the same chrom + if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) { + return false; + } + // the query is ahead of the database. fast-forward the database to catch-up. + else if (_curr_qy.chrom > _curr_db.chrom) { + while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom) + { + _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); + } + _cache.clear(); + return false; + } + // the database is ahead of the query. + else { + // 1. scan the cache for remaining hits on the query's current chrom. + if (_curr_qy.chrom == _curr_chrom) + { + ScanCache(); + _results.push(make_pair(_curr_qy, _hits)); + _hits.clear(); + } + // 2. fast-forward until we catch up and report 0 hits until we do. + else if (_curr_qy.chrom < _curr_db.chrom) + { + _results.push(make_pair(_curr_qy, _no_hits)); + _cache.clear(); + } + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _curr_chrom = _curr_qy.chrom; + return true; + } +} + +bool ChromSweep::IsValidHit(const BED &query, const BED &db) { + // do we have an overlap in the DB? + if (overlaps(query.start, query.end, db.start, db.end) > 0) { + // Now test for necessary strandedness. + bool strands_are_same = (query.strand == db.strand); + if ( (_sameStrand == false && _diffStrand == false) + || + (_sameStrand == true && strands_are_same == true) + || + (_diffStrand == true && strands_are_same == false) + ) + { + return true; + } + } + return false; +} + + +bool ChromSweep::Next(pair<BED, vector<BED> > &next) { + if (!_bedA->Empty()) { + // have we changed chromosomes? + if (ChromChange() == false) { + // scan the database cache for hits + ScanCache(); + // advance the db until we are ahead of the query. update hits and cache as necessary + while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) + { + if (IsValidHit(_curr_qy, _curr_db)) { + _hits.push_back(_curr_db); + } + _cache.push_back(_curr_db); + _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); + } + // add the hits for this query to the pump + _results.push(make_pair(_curr_qy, _hits)); + // reset for the next query + _hits.clear(); + _curr_qy = _nullBed; + _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); + _curr_chrom = _curr_qy.chrom; + } + } + // report the next set if hits if there are still overlaps in the pump + if (!_results.empty()) { + next = _results.front(); + _results.pop(); + return true; + } + // otherwise, the party is over. + else {return false;} +} +