0
|
1 /*****************************************************************************
|
|
2 chromsweepBed.h
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licenced under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #ifndef CHROMSWEEP_H
|
|
13 #define CHROMSWEEP_H
|
|
14
|
|
15 #include "bedFile.h"
|
|
16 #include <vector>
|
|
17 #include <queue>
|
|
18 #include <iostream>
|
|
19 #include <fstream>
|
|
20 #include <stdlib.h>
|
|
21 using namespace std;
|
|
22
|
|
23
|
|
24
|
|
25 class ChromSweep {
|
|
26
|
|
27 // public interface.
|
|
28 public:
|
|
29
|
|
30 // A is the query and B is the database
|
|
31
|
|
32 // constructor using existing BedFile pointers
|
|
33 ChromSweep(BedFile *bedA, BedFile *bedB, bool sameStrand = false, bool diffStrand = false);
|
|
34
|
|
35 // constructor using filenames
|
|
36 ChromSweep(string &bedAFile, string &bedBFile);
|
|
37
|
|
38 // destructor
|
|
39 ~ChromSweep(void);
|
|
40
|
|
41 // loads next (a pair) with the current query and it's overlaps
|
|
42 // next.first is the current query interval
|
|
43 // next.second is a vector of the current query's hits.
|
|
44 // returns true if overlap
|
|
45 bool Next(pair<BED, vector<BED> > &next);
|
|
46
|
|
47 // Usage:
|
|
48 // ChromSweep sweep = ChromSweep(_bedA, _bedB);
|
|
49 // pair<BED, vector<BED> > hit_set;
|
|
50 // while (sweep.Next(hit_set))
|
|
51 // {
|
|
52 // // magic happens here!
|
|
53 // processHits(hit_set.first, hit_set.second);
|
|
54 // }
|
|
55
|
|
56 // private variables.
|
|
57 private:
|
|
58
|
|
59 // instances of a bed file class.
|
|
60 BedFile *_bedA, *_bedB;
|
|
61 // do we care about strandedness.
|
|
62 bool _sameStrand, _diffStrand;
|
|
63 // a cache of still active features from the database file
|
|
64 vector<BED> _cache;
|
|
65 // the set of hits in the database for the current query
|
|
66 vector<BED> _hits;
|
|
67 // a queue from which we retrieve overlap results. used by Next()
|
|
68 queue< pair<BED, vector<BED> > > _results;
|
|
69 BED _nullBed;
|
|
70 // an empty BED vector for returning no hits for a given query
|
|
71 vector<BED> _no_hits;
|
|
72 // the current query and db features.
|
|
73 BED _curr_qy, _curr_db;
|
|
74 // a cache of the current chrom from the query. used to handle chrom changes.
|
|
75 string _curr_chrom;
|
|
76 // the current line status in the database and query files
|
|
77 BedLineStatus _qy_status, _db_status;
|
|
78 // the current line numbers in the database and query files
|
|
79 int _qy_lineNum, _db_lineNum;
|
|
80
|
|
81 // private methods.
|
|
82 private:
|
|
83
|
|
84 void ScanCache();
|
|
85 bool ChromChange();
|
|
86 bool IsValidHit(const BED &query, const BED &db);
|
|
87 };
|
|
88
|
|
89 #endif /* CHROMSWEEP_H */
|