annotate BEDTools-Version-2.14.3/src/utils/chromsweep/chromsweep.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 chromsweep.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "chromsweep.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14 #include <queue>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 bool after(const BED &a, const BED &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 void report_hits(const BED &curr_qy, const vector<BED> &hits);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 vector<BED> scan_cache(const BED &curr_qy, BedLineStatus qy_status, const vector<BED> &db_cache, vector<BED> &hits);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 // constructor using existing BedFile pointers
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 ChromSweep::ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand, bool diffStrand)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 : _bedA(bedA)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 , _bedB(bedB)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 , _sameStrand(sameStrand)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 , _diffStrand(diffStrand)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 // prime the results pump.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 _qy_lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 _db_lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 _hits.reserve(1000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 _cache.reserve(1000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 _bedA->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 _bedB->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 Constructor with filenames
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 ChromSweep::ChromSweep(string &bedAFile, string &bedBFile)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 // prime the results pump.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 _qy_lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 _db_lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 _hits.reserve(100000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 _cache.reserve(100000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 _bedA = new BedFile(bedAFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 _bedB = new BedFile(bedBFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 _bedA->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 _bedB->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 ChromSweep::~ChromSweep(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 void ChromSweep::ScanCache() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 if (_qy_status != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 vector<BED>::iterator c = _cache.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 while (c != _cache.end())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 if (IsValidHit(_curr_qy, *c)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 _hits.push_back(*c);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 ++c;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 c = _cache.erase(c);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 bool ChromSweep::ChromChange()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 // the files are on the same chrom
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 if ((_curr_qy.chrom == _curr_db.chrom) || (_db_status == BED_INVALID) || (_qy_status == BED_INVALID)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 // the query is ahead of the database. fast-forward the database to catch-up.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 else if (_curr_qy.chrom > _curr_db.chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 while (!_bedB->Empty() && _curr_db.chrom < _curr_qy.chrom)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 _cache.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 // the database is ahead of the query.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 // 1. scan the cache for remaining hits on the query's current chrom.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 if (_curr_qy.chrom == _curr_chrom)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 ScanCache();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 _results.push(make_pair(_curr_qy, _hits));
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 _hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 // 2. fast-forward until we catch up and report 0 hits until we do.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 else if (_curr_qy.chrom < _curr_db.chrom)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 _results.push(make_pair(_curr_qy, _no_hits));
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 _cache.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 _curr_chrom = _curr_qy.chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 bool ChromSweep::IsValidHit(const BED &query, const BED &db) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 // do we have an overlap in the DB?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 if (overlaps(query.start, query.end, db.start, db.end) > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 // Now test for necessary strandedness.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 bool strands_are_same = (query.strand == db.strand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 if ( (_sameStrand == false && _diffStrand == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 (_sameStrand == true && strands_are_same == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 (_diffStrand == true && strands_are_same == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 bool ChromSweep::Next(pair<BED, vector<BED> > &next) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 if (!_bedA->Empty()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 // have we changed chromosomes?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 if (ChromChange() == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 // scan the database cache for hits
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 ScanCache();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 // advance the db until we are ahead of the query. update hits and cache as necessary
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 while (!_bedB->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy)))
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 if (IsValidHit(_curr_qy, _curr_db)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 _hits.push_back(_curr_db);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 _cache.push_back(_curr_db);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 _db_status = _bedB->GetNextBed(_curr_db, _db_lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 // add the hits for this query to the pump
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163 _results.push(make_pair(_curr_qy, _hits));
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 // reset for the next query
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 _hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 _curr_qy = _nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 _qy_status = _bedA->GetNextBed(_curr_qy, _qy_lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 _curr_chrom = _curr_qy.chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171 // report the next set if hits if there are still overlaps in the pump
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 if (!_results.empty()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 next = _results.front();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 _results.pop();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 // otherwise, the party is over.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 else {return false;}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180