comparison BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 chromsweep.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "chromsweep.h"
14 #include <queue>
15
16 bool after(const BED &a, const BED &b);
17 void report_hits(const BED &curr_qy, const vector<BED> &hits);
18 vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits);
19
20
21 /*
22 // constructor using existing BedFile pointers
23 */
24 ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand)
25 : _bedA(bedA)
26 , _bedB(bedB)
27 , _sameStrand(sameStrand)
28 , _diffStrand(diffStrand)
29 {
30 // prime the results pump.
31 _qy_lineNum = 0;
32 _db_lineNum = 0;
33
34 _hits.reserve(1000);
35 _cache.reserve(1000);
36
37 _bedA->Open();
38 _bedB->Open();
39 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
40 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
41 }
42
43 /*
44 Constructor with filenames
45 */
46 ChromSweep::ChromSweep(string &bedAFile, string &bedBFile)
47 {
48 // prime the results pump.
49 _qy_lineNum = 0;
50 _db_lineNum = 0;
51
52 _hits.reserve(100000);
53 _cache.reserve(100000);
54
55 _bedA = new BedFile(bedAFile);
56 _bedB = new BedFile(bedBFile);
57
58 _bedA->Open();
59 _bedB->Open();
60
61 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
62 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
63 }
64
65
66 /*
67 Destructor
68 */
69 ChromSweep::~ChromSweep(void) {
70 }
71
72
73 void ChromSweep::ScanCache() {
74 if (_qy_status != BED_INVALID) {
75 vector<BED>::iterator c = _cache.begin();
76 while (c != _cache.end())
77 {
78 if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) {
79 if (IsValidHit(_curr_qy, *c)) {
80 _hits.push_back(*c);
81 }
82 ++c;
83 }
84 else {
85 c = _cache.erase(c);
86 }
87 }
88 }
89 }
90
91
92 bool ChromSweep::ChromChange()
93 {
94 // the files are on the same chrom
95 if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) {
96 return false;
97 }
98 // the query is ahead of the database. fast-forward the database to catch-up.
99 else if (_curr_qy.chrom > _curr_db.chrom) {
100 while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom)
101 {
102 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
103 }
104 _cache.clear();
105 return false;
106 }
107 // the database is ahead of the query.
108 else {
109 // 1. scan the cache for remaining hits on the query's current chrom.
110 if (_curr_qy.chrom == _curr_chrom)
111 {
112 ScanCache();
113 _results.push(make_pair(_curr_qy, _hits));
114 _hits.clear();
115 }
116 // 2. fast-forward until we catch up and report 0 hits until we do.
117 else if (_curr_qy.chrom < _curr_db.chrom)
118 {
119 _results.push(make_pair(_curr_qy, _no_hits));
120 _cache.clear();
121 }
122 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
123 _curr_chrom = _curr_qy.chrom;
124 return true;
125 }
126 }
127
128 bool ChromSweep::IsValidHit(const BED &query, const BED &db) {
129 // do we have an overlap in the DB?
130 if (overlaps(query.start, query.end, db.start, db.end) > 0) {
131 // Now test for necessary strandedness.
132 bool strands_are_same = (query.strand == db.strand);
133 if ( (_sameStrand == false && _diffStrand == false)
134 ||
135 (_sameStrand == true && strands_are_same == true)
136 ||
137 (_diffStrand == true && strands_are_same == false)
138 )
139 {
140 return true;
141 }
142 }
143 return false;
144 }
145
146
147 bool ChromSweep::Next(pair<BED, vector<BED> > &next) {
148 if (!_bedA->Empty()) {
149 // have we changed chromosomes?
150 if (ChromChange() == false) {
151 // scan the database cache for hits
152 ScanCache();
153 // advance the db until we are ahead of the query. update hits and cache as necessary
154 while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy)))
155 {
156 if (IsValidHit(_curr_qy, _curr_db)) {
157 _hits.push_back(_curr_db);
158 }
159 _cache.push_back(_curr_db);
160 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
161 }
162 // add the hits for this query to the pump
163 _results.push(make_pair(_curr_qy, _hits));
164 // reset for the next query
165 _hits.clear();
166 _curr_qy = _nullBed;
167 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
168 _curr_chrom = _curr_qy.chrom;
169 }
170 }
171 // report the next set if hits if there are still overlaps in the pump
172 if (!_results.empty()) {
173 next = _results.front();
174 _results.pop();
175 return true;
176 }
177 // otherwise, the party is over.
178 else {return false;}
179 }
180