Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp @ 1:bec36315bd12 default tip
Deleted selected files
| author | aaronquinlan |
|---|---|
| date | Sat, 19 Nov 2011 14:17:03 -0500 |
| parents | dfcd8b6c1bda |
| children |
comparison
equal
deleted
inserted
replaced
| 0:dfcd8b6c1bda | 1:bec36315bd12 |
|---|---|
| 1 /***************************************************************************** | |
| 2 chromsweep.cpp | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #include "lineFileUtilities.h" | |
| 13 #include "chromsweep.h" | |
| 14 #include <queue> | |
| 15 | |
| 16 bool after(const BED &a, const BED &b); | |
| 17 void report_hits(const BED &curr_qy, const vector<BED> &hits); | |
| 18 vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits); | |
| 19 | |
| 20 | |
| 21 /* | |
| 22 // constructor using existing BedFile pointers | |
| 23 */ | |
| 24 ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand) | |
| 25 : _bedA(bedA) | |
| 26 , _bedB(bedB) | |
| 27 , _sameStrand(sameStrand) | |
| 28 , _diffStrand(diffStrand) | |
| 29 { | |
| 30 // prime the results pump. | |
| 31 _qy_lineNum = 0; | |
| 32 _db_lineNum = 0; | |
| 33 | |
| 34 _hits.reserve(1000); | |
| 35 _cache.reserve(1000); | |
| 36 | |
| 37 _bedA->Open(); | |
| 38 _bedB->Open(); | |
| 39 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); | |
| 40 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); | |
| 41 } | |
| 42 | |
| 43 /* | |
| 44 Constructor with filenames | |
| 45 */ | |
| 46 ChromSweep::ChromSweep(string &bedAFile, string &bedBFile) | |
| 47 { | |
| 48 // prime the results pump. | |
| 49 _qy_lineNum = 0; | |
| 50 _db_lineNum = 0; | |
| 51 | |
| 52 _hits.reserve(100000); | |
| 53 _cache.reserve(100000); | |
| 54 | |
| 55 _bedA = new BedFile(bedAFile); | |
| 56 _bedB = new BedFile(bedBFile); | |
| 57 | |
| 58 _bedA->Open(); | |
| 59 _bedB->Open(); | |
| 60 | |
| 61 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); | |
| 62 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); | |
| 63 } | |
| 64 | |
| 65 | |
| 66 /* | |
| 67 Destructor | |
| 68 */ | |
| 69 ChromSweep::~ChromSweep(void) { | |
| 70 } | |
| 71 | |
| 72 | |
| 73 void ChromSweep::ScanCache() { | |
| 74 if (_qy_status != BED_INVALID) { | |
| 75 vector<BED>::iterator c = _cache.begin(); | |
| 76 while (c != _cache.end()) | |
| 77 { | |
| 78 if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) { | |
| 79 if (IsValidHit(_curr_qy, *c)) { | |
| 80 _hits.push_back(*c); | |
| 81 } | |
| 82 ++c; | |
| 83 } | |
| 84 else { | |
| 85 c = _cache.erase(c); | |
| 86 } | |
| 87 } | |
| 88 } | |
| 89 } | |
| 90 | |
| 91 | |
| 92 bool ChromSweep::ChromChange() | |
| 93 { | |
| 94 // the files are on the same chrom | |
| 95 if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) { | |
| 96 return false; | |
| 97 } | |
| 98 // the query is ahead of the database. fast-forward the database to catch-up. | |
| 99 else if (_curr_qy.chrom > _curr_db.chrom) { | |
| 100 while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom) | |
| 101 { | |
| 102 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); | |
| 103 } | |
| 104 _cache.clear(); | |
| 105 return false; | |
| 106 } | |
| 107 // the database is ahead of the query. | |
| 108 else { | |
| 109 // 1. scan the cache for remaining hits on the query's current chrom. | |
| 110 if (_curr_qy.chrom == _curr_chrom) | |
| 111 { | |
| 112 ScanCache(); | |
| 113 _results.push(make_pair(_curr_qy, _hits)); | |
| 114 _hits.clear(); | |
| 115 } | |
| 116 // 2. fast-forward until we catch up and report 0 hits until we do. | |
| 117 else if (_curr_qy.chrom < _curr_db.chrom) | |
| 118 { | |
| 119 _results.push(make_pair(_curr_qy, _no_hits)); | |
| 120 _cache.clear(); | |
| 121 } | |
| 122 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); | |
| 123 _curr_chrom = _curr_qy.chrom; | |
| 124 return true; | |
| 125 } | |
| 126 } | |
| 127 | |
| 128 bool ChromSweep::IsValidHit(const BED &query, const BED &db) { | |
| 129 // do we have an overlap in the DB? | |
| 130 if (overlaps(query.start, query.end, db.start, db.end) > 0) { | |
| 131 // Now test for necessary strandedness. | |
| 132 bool strands_are_same = (query.strand == db.strand); | |
| 133 if ( (_sameStrand == false && _diffStrand == false) | |
| 134 || | |
| 135 (_sameStrand == true && strands_are_same == true) | |
| 136 || | |
| 137 (_diffStrand == true && strands_are_same == false) | |
| 138 ) | |
| 139 { | |
| 140 return true; | |
| 141 } | |
| 142 } | |
| 143 return false; | |
| 144 } | |
| 145 | |
| 146 | |
| 147 bool ChromSweep::Next(pair<BED, vector<BED> > &next) { | |
| 148 if (!_bedA->Empty()) { | |
| 149 // have we changed chromosomes? | |
| 150 if (ChromChange() == false) { | |
| 151 // scan the database cache for hits | |
| 152 ScanCache(); | |
| 153 // advance the db until we are ahead of the query. update hits and cache as necessary | |
| 154 while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) | |
| 155 { | |
| 156 if (IsValidHit(_curr_qy, _curr_db)) { | |
| 157 _hits.push_back(_curr_db); | |
| 158 } | |
| 159 _cache.push_back(_curr_db); | |
| 160 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); | |
| 161 } | |
| 162 // add the hits for this query to the pump | |
| 163 _results.push(make_pair(_curr_qy, _hits)); | |
| 164 // reset for the next query | |
| 165 _hits.clear(); | |
| 166 _curr_qy = _nullBed; | |
| 167 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); | |
| 168 _curr_chrom = _curr_qy.chrom; | |
| 169 } | |
| 170 } | |
| 171 // report the next set if hits if there are still overlaps in the pump | |
| 172 if (!_results.empty()) { | |
| 173 next = _results.front(); | |
| 174 _results.pop(); | |
| 175 return true; | |
| 176 } | |
| 177 // otherwise, the party is over. | |
| 178 else {return false;} | |
| 179 } | |
| 180 |
