Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 chromsweep.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include "lineFileUtilities.h" | |
13 #include "chromsweep.h" | |
14 #include <queue> | |
15 | |
16 bool after(const BED &a, const BED &b); | |
17 void report_hits(const BED &curr_qy, const vector<BED> &hits); | |
18 vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits); | |
19 | |
20 | |
21 /* | |
22 // constructor using existing BedFile pointers | |
23 */ | |
24 ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand) | |
25 : _bedA(bedA) | |
26 , _bedB(bedB) | |
27 , _sameStrand(sameStrand) | |
28 , _diffStrand(diffStrand) | |
29 { | |
30 // prime the results pump. | |
31 _qy_lineNum = 0; | |
32 _db_lineNum = 0; | |
33 | |
34 _hits.reserve(1000); | |
35 _cache.reserve(1000); | |
36 | |
37 _bedA->Open(); | |
38 _bedB->Open(); | |
39 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); | |
40 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); | |
41 } | |
42 | |
43 /* | |
44 Constructor with filenames | |
45 */ | |
46 ChromSweep::ChromSweep(string &bedAFile, string &bedBFile) | |
47 { | |
48 // prime the results pump. | |
49 _qy_lineNum = 0; | |
50 _db_lineNum = 0; | |
51 | |
52 _hits.reserve(100000); | |
53 _cache.reserve(100000); | |
54 | |
55 _bedA = new BedFile(bedAFile); | |
56 _bedB = new BedFile(bedBFile); | |
57 | |
58 _bedA->Open(); | |
59 _bedB->Open(); | |
60 | |
61 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); | |
62 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); | |
63 } | |
64 | |
65 | |
66 /* | |
67 Destructor | |
68 */ | |
69 ChromSweep::~ChromSweep(void) { | |
70 } | |
71 | |
72 | |
73 void ChromSweep::ScanCache() { | |
74 if (_qy_status != BED_INVALID) { | |
75 vector<BED>::iterator c = _cache.begin(); | |
76 while (c != _cache.end()) | |
77 { | |
78 if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) { | |
79 if (IsValidHit(_curr_qy, *c)) { | |
80 _hits.push_back(*c); | |
81 } | |
82 ++c; | |
83 } | |
84 else { | |
85 c = _cache.erase(c); | |
86 } | |
87 } | |
88 } | |
89 } | |
90 | |
91 | |
92 bool ChromSweep::ChromChange() | |
93 { | |
94 // the files are on the same chrom | |
95 if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) { | |
96 return false; | |
97 } | |
98 // the query is ahead of the database. fast-forward the database to catch-up. | |
99 else if (_curr_qy.chrom > _curr_db.chrom) { | |
100 while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom) | |
101 { | |
102 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); | |
103 } | |
104 _cache.clear(); | |
105 return false; | |
106 } | |
107 // the database is ahead of the query. | |
108 else { | |
109 // 1. scan the cache for remaining hits on the query's current chrom. | |
110 if (_curr_qy.chrom == _curr_chrom) | |
111 { | |
112 ScanCache(); | |
113 _results.push(make_pair(_curr_qy, _hits)); | |
114 _hits.clear(); | |
115 } | |
116 // 2. fast-forward until we catch up and report 0 hits until we do. | |
117 else if (_curr_qy.chrom < _curr_db.chrom) | |
118 { | |
119 _results.push(make_pair(_curr_qy, _no_hits)); | |
120 _cache.clear(); | |
121 } | |
122 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); | |
123 _curr_chrom = _curr_qy.chrom; | |
124 return true; | |
125 } | |
126 } | |
127 | |
128 bool ChromSweep::IsValidHit(const BED &query, const BED &db) { | |
129 // do we have an overlap in the DB? | |
130 if (overlaps(query.start, query.end, db.start, db.end) > 0) { | |
131 // Now test for necessary strandedness. | |
132 bool strands_are_same = (query.strand == db.strand); | |
133 if ( (_sameStrand == false && _diffStrand == false) | |
134 || | |
135 (_sameStrand == true && strands_are_same == true) | |
136 || | |
137 (_diffStrand == true && strands_are_same == false) | |
138 ) | |
139 { | |
140 return true; | |
141 } | |
142 } | |
143 return false; | |
144 } | |
145 | |
146 | |
147 bool ChromSweep::Next(pair<BED, vector<BED> > &next) { | |
148 if (!_bedA->Empty()) { | |
149 // have we changed chromosomes? | |
150 if (ChromChange() == false) { | |
151 // scan the database cache for hits | |
152 ScanCache(); | |
153 // advance the db until we are ahead of the query. update hits and cache as necessary | |
154 while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) | |
155 { | |
156 if (IsValidHit(_curr_qy, _curr_db)) { | |
157 _hits.push_back(_curr_db); | |
158 } | |
159 _cache.push_back(_curr_db); | |
160 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum); | |
161 } | |
162 // add the hits for this query to the pump | |
163 _results.push(make_pair(_curr_qy, _hits)); | |
164 // reset for the next query | |
165 _hits.clear(); | |
166 _curr_qy = _nullBed; | |
167 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum); | |
168 _curr_chrom = _curr_qy.chrom; | |
169 } | |
170 } | |
171 // report the next set if hits if there are still overlaps in the pump | |
172 if (!_results.empty()) { | |
173 next = _results.front(); | |
174 _results.pop(); | |
175 return true; | |
176 } | |
177 // otherwise, the party is over. | |
178 else {return false;} | |
179 } | |
180 |