annotate BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 bedFile.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licensed under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "bedFile.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 /************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 Helper functions
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 *************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 if (bed.otherFields.size() < 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 cerr << "Input error: Cannot split into blocks. Found interval with fewer than 12 columns on line " << lineNum << "." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 int blockCount = atoi(bed.otherFields[3].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 if ( blockCount <= 0 ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 cerr << "Input error: found interval having <= 0 blocks on line " << lineNum << "." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 else if ( blockCount == 1 ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 //take a short-cut for single blocks
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 bedBlocks.push_back(bed);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 // get the comma-delimited strings for the BED12 block starts and block ends.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 string blockSizes(bed.otherFields[4]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 string blockStarts(bed.otherFields[5]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 vector<int> sizes;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 vector<int> starts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 Tokenize(blockSizes, sizes, ",");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 Tokenize(blockStarts, starts, ",");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 if ( sizes.size() != (size_t) blockCount || starts.size() != (size_t) blockCount ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 cerr << "Input error: found interval with block-counts not matching starts/sizes on line " << lineNum << "." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 // add each BED block to the bedBlocks vector
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 for (UINT i = 0; i < (UINT) blockCount; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 CHRPOS blockStart = bed.start + starts[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 CHRPOS blockEnd = bed.start + starts[i] + sizes[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 BED currBedBlock(bed.chrom, blockStart, blockEnd, bed.name, bed.score, bed.strand, bed.otherFields);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 bedBlocks.push_back(currBedBlock);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 /***********************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 Sorting comparison functions
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 ************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 bool sortByChrom(BED const &a, BED const &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 if (a.chrom < b.chrom) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 bool sortByStart(const BED &a, const BED &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 if (a.start < b.start) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 bool sortBySizeAsc(const BED &a, const BED &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 CHRPOS aLen = a.end - a.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 CHRPOS bLen = b.end - b.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 if (aLen < bLen) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 bool sortBySizeDesc(const BED &a, const BED &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 CHRPOS aLen = a.end - a.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 CHRPOS bLen = b.end - b.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 if (aLen > bLen) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 bool sortByScoreAsc(const BED &a, const BED &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 if (a.score < b.score) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 bool sortByScoreDesc(const BED &a, const BED &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 if (a.score > b.score) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 bool byChromThenStart(BED const &a, BED const &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 if (a.chrom < b.chrom) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 else if (a.chrom > b.chrom) return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 if (a.start < b.start) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 else if (a.start >= b.start) return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 /*******************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 Class methods
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 *******************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 // Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 BedFile::BedFile(string &bedFile)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 : bedFile(bedFile),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 _isGff(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 _isVcf(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 _typeIsKnown(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 _merged_start(-1),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 _merged_end(-1),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 _merged_chrom(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 _prev_start(-1),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 _prev_chrom("")
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 // Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 BedFile::~BedFile(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 void BedFile::Open(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 _bedFields.reserve(12);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 if (bedFile == "stdin" || bedFile == "-") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 _bedStream = &cin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 _bedStream = new ifstream(bedFile.c_str(), ios::in);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 if( isGzipFile(_bedStream) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 delete _bedStream;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 _bedStream = new igzstream(bedFile.c_str(), ios::in);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 if ( !(_bedStream->good()) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 exit (1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 // Rewind the pointer back to the beginning of the file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 void BedFile::Rewind(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 _bedStream->seekg(0, ios::beg);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 // Jump to a specific byte in the file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 void BedFile::Seek(unsigned long offset) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163 _bedStream->seekg(offset);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 // Jump to a specific byte in the file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 bool BedFile::Empty() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 return _bedStream->eof();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 // Close the BED file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 void BedFile::Close(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 if (bedFile != "stdin" && bedFile != "-") delete _bedStream;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 BedLineStatus BedFile::GetNextBed(BED &bed, int &lineNum, bool forceSorted) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 // make sure there are still lines to process.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 // if so, tokenize, validate and return the BED entry.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 _bedFields.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 // clear out the previous bed's data
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 if (_bedStream->good()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185 // parse the bedStream pointer
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186 getline(*_bedStream, _bedLine);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 lineNum++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 // split into a string vector.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 Tokenize(_bedLine, _bedFields);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192 // load the BED struct as long as it's a valid BED entry.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 BedLineStatus status = parseLine(bed, _bedFields, lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194 if (!forceSorted) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 return status;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197 else if (status == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 if (bed.chrom == _prev_chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 if ((int) bed.start >= _prev_start) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 _prev_chrom = bed.chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201 _prev_start = bed.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202 return status;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205 cerr << "ERROR: input file: (" << bedFile << ") is not sorted by chrom then start" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209 else if (bed.chrom > _prev_chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210 _prev_chrom = bed.chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211 _prev_start = bed.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212 return status;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214 else if (bed.chrom < _prev_chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 cerr << "ERROR: input file: (" << bedFile << ") is not sorted by chrom then start" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221 // default if file is closed or EOF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222 return BED_INVALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 bool BedFile::GetNextMergedBed(BED &merged_bed, int &lineNum) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228 if (_bedStream->good()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229 BED bed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231 // force sorting; hence third param = true
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 while ((bedStatus = GetNextBed(bed, lineNum, true)) != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 if (((int) bed.start - _merged_end > 0) ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 (_merged_end < 0) ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 (bed.chrom != _merged_chrom))
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238 if (_merged_start >= 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 merged_bed.chrom = _merged_chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 merged_bed.start = _merged_start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241 merged_bed.end = _merged_end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 _merged_chrom = bed.chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244 _merged_start = bed.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245 _merged_end = bed.end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250 _merged_start = bed.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251 _merged_chrom = bed.chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252 _merged_end = bed.end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 else if ((int) bed.end > _merged_end)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257 _merged_end = bed.end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261 // handle the last merged block in the file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262 if (bedStatus == BED_INVALID)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264 merged_bed.chrom = _merged_chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265 merged_bed.start = _merged_start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266 merged_bed.end = _merged_end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274 void BedFile::FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 string strand, vector<BED> &hits, bool sameStrand, bool diffStrand) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277 BIN startBin, endBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278 startBin = (start >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 endBin = ((end-1) >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281 // loop through each bin "level" in the binning hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 for (BINLEVEL i = 0; i < _binLevels; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 // loop through each bin at this level of the hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285 BIN offset = _binOffsetsExtended[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288 // loop through each feature in this chrom/bin and see if it overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289 // with the feature that was passed in. if so, add the feature to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
290 // the list of hits.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
291 vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
292 vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
293
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
294 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
295 // do we have sufficient overlap?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
296 if (overlaps(bedItr->start, bedItr->end, start, end) > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
297
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
298 bool strands_are_same = (strand == bedItr->strand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
299
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
300 // test for necessary strandedness
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
301 if ( (sameStrand == false && diffStrand == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
302 ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
303 (sameStrand == true && strands_are_same == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
304 ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
305 (diffStrand == true && strands_are_same == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
306 )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
307 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
308 hits.push_back(*bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
309 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
310 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
311 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
312 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
313 startBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
314 endBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
315 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
316 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
317
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
318
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
319 bool BedFile::FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
320 bool sameStrand, bool diffStrand, float overlapFraction) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
321
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
322 BIN startBin, endBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
323 startBin = (start >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
324 endBin = ((end-1) >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
325
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
326 CHRPOS aLength = (end - start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
327
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
328 // loop through each bin "level" in the binning hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
329 for (BINLEVEL i = 0; i < _binLevels; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
330
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
331 // loop through each bin at this level of the hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
332 BIN offset = _binOffsetsExtended[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
333 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
334
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
335 // loop through each feature in this chrom/bin and see if it overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
336 // with the feature that was passed in. if so, add the feature to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
337 // the list of hits.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
338 vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
339 vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
340 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
341
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
342 CHRPOS s = max(start, bedItr->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
343 CHRPOS e = min(end, bedItr->end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
344 // the number of overlapping bases b/w a and b
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
345 int overlapBases = (e - s);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
346
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
347 // do we have sufficient overlap?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
348 if ( (float) overlapBases / (float) aLength >= overlapFraction) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
349
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
350 bool strands_are_same = (strand == bedItr->strand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
351 // test for necessary strandedness
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
352 if ( (sameStrand == false && diffStrand == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
353 ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
354 (sameStrand == true && strands_are_same == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
355 ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
356 (diffStrand == true && strands_are_same == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
357 )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
358 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
359 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
360 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
361 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
362 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
363 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
364 startBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
365 endBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
366 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
367 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
368 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
369
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
370
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
371 bool BedFile::FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
372 bool sameStrand, bool diffStrand, float overlapFraction) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
373
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
374 BIN startBin, endBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
375 startBin = (start >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
376 endBin = ((end-1) >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
377
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
378 CHRPOS aLength = (end - start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
379
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
380 // loop through each bin "level" in the binning hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
381 for (BINLEVEL i = 0; i < _binLevels; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
382
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
383 // loop through each bin at this level of the hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
384 BIN offset = _binOffsetsExtended[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
385 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
386
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
387 // loop through each feature in this chrom/bin and see if it overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
388 // with the feature that was passed in. if so, add the feature to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
389 // the list of hits.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
390 vector<BED>::const_iterator bedItr = bedMap[chrom][j].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
391 vector<BED>::const_iterator bedEnd = bedMap[chrom][j].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
392 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
393 CHRPOS s = max(start, bedItr->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
394 CHRPOS e = min(end, bedItr->end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
395
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
396 // the number of overlapping bases b/w a and b
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
397 int overlapBases = (e - s);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
398
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
399 // do we have sufficient overlap?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
400 if ( (float) overlapBases / (float) aLength >= overlapFraction) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
401 CHRPOS bLength = (bedItr->end - bedItr->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
402 float bOverlap = ( (float) overlapBases / (float) bLength );
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
403 bool strands_are_same = (strand == bedItr->strand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
404
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
405 // test for sufficient reciprocal overlap and strandedness
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
406 if ( (bOverlap >= overlapFraction) &&
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
407 ((sameStrand == false && diffStrand == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
408 ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
409 (sameStrand == true && strands_are_same == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
410 ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
411 (diffStrand == true && strands_are_same == false))
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
412 )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
413 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
414 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
415 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
416 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
417 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
418 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
419 startBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
420 endBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
421 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
422 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
423 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
424
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
425
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
426 void BedFile::countHits(const BED &a, bool sameStrand, bool diffStrand, bool countsOnly) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
427
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
428 BIN startBin, endBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
429 startBin = (a.start >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
430 endBin = ((a.end-1) >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
431
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
432 // loop through each bin "level" in the binning hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
433 for (BINLEVEL i = 0; i < _binLevels; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
434
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
435 // loop through each bin at this level of the hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
436 BIN offset = _binOffsetsExtended[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
437 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
438
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
439 // loop through each feature in this chrom/bin and see if it overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
440 // with the feature that was passed in. if so, add the feature to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
441 // the list of hits.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
442 vector<BEDCOV>::iterator bedItr = bedCovMap[a.chrom][j].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
443 vector<BEDCOV>::iterator bedEnd = bedCovMap[a.chrom][j].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
444 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
445
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
446 bool strands_are_same = (a.strand == bedItr->strand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
447 // skip the hit if not on the same strand (and we care)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
448 if ((sameStrand == true && strands_are_same == false) ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
449 (diffStrand == true && strands_are_same == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
450 )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
451 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
452 continue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
453 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
454 else if (overlaps(bedItr->start, bedItr->end, a.start, a.end) > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
455
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
456 bedItr->count++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
457 if (countsOnly == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
458 if (a.zeroLength == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
459 bedItr->depthMap[a.start+1].starts++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
460 bedItr->depthMap[a.end].ends++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
461 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
462 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
463 // correct for the fact that we artificially expanded the zeroLength feature
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
464 bedItr->depthMap[a.start+2].starts++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
465 bedItr->depthMap[a.end-1].ends++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
466 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
467
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
468 if (a.start < bedItr->minOverlapStart) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
469 bedItr->minOverlapStart = a.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
470 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
471 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
472 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
473 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
474 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
475 startBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
476 endBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
477 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
478 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
479
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
480
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
481 void BedFile::countSplitHits(const vector<BED> &bedBlocks, bool sameStrand, bool diffStrand, bool countsOnly) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
482
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
483 // set to track the distinct B features that had coverage.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
484 // we'll update the counts of coverage for these features by one
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
485 // at the end of this function to avoid over-counting.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
486 set< vector<BEDCOV>::iterator > validHits;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
487
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
488 vector<BED>::const_iterator blockItr = bedBlocks.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
489 vector<BED>::const_iterator blockEnd = bedBlocks.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
490 for (; blockItr != blockEnd; ++blockItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
491
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
492 BIN startBin, endBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
493 startBin = (blockItr->start >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
494 endBin = ((blockItr->end-1) >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
495
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
496 // loop through each bin "level" in the binning hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
497 for (BINLEVEL i = 0; i < _binLevels; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
498
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
499 // loop through each bin at this level of the hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
500 BIN offset = _binOffsetsExtended[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
501 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
502
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
503 // loop through each feature in this chrom/bin and see if it overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
504 // with the feature that was passed in. if so, add the feature to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
505 // the list of hits.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
506 vector<BEDCOV>::iterator bedItr = bedCovMap[blockItr->chrom][j].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
507 vector<BEDCOV>::iterator bedEnd = bedCovMap[blockItr->chrom][j].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
508 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
509
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
510 bool strands_are_same = (blockItr->strand == bedItr->strand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
511 // skip the hit if not on the same strand (and we care)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
512 if ((sameStrand == true && strands_are_same == false) ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
513 (diffStrand == true && strands_are_same == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
514 )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
515 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
516 continue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
517 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
518 else if (overlaps(bedItr->start, bedItr->end, blockItr->start, blockItr->end) > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
519 if (countsOnly == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
520 if (blockItr->zeroLength == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
521 bedItr->depthMap[blockItr->start+1].starts++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
522 bedItr->depthMap[blockItr->end].ends++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
523 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
524 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
525 // correct for the fact that we artificially expanded the zeroLength feature
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
526 bedItr->depthMap[blockItr->start+2].starts++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
527 bedItr->depthMap[blockItr->end-1].ends++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
528 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
529 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
530
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
531 validHits.insert(bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
532 if (blockItr->start < bedItr->minOverlapStart)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
533 bedItr->minOverlapStart = blockItr->start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
534 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
535 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
536 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
537 startBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
538 endBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
539 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
540 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
541 // incrment the count of overlap by one for each B feature that overlapped
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
542 // the current passed hit. This is necessary to prevent over-counting for
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
543 // each "split"" of a single read.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
544 set< vector<BEDCOV>::iterator >::iterator validHitsItr = validHits.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
545 set< vector<BEDCOV>::iterator >::iterator validHitsEnd = validHits.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
546 for (; validHitsItr != validHitsEnd; ++validHitsItr)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
547 // the validHitsItr points to another itr, hence the (*itr)-> dereferencing.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
548 // ugly, but that's C++.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
549 (*validHitsItr)->count++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
550 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
551
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
552
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
553 void BedFile::countListHits(const BED &a, int index, bool sameStrand, bool diffStrand) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
554
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
555 BIN startBin, endBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
556 startBin = (a.start >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
557 endBin = ((a.end-1) >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
558
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
559 // loop through each bin "level" in the binning hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
560 for (BINLEVEL i = 0; i < _binLevels; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
561
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
562 // loop through each bin at this level of the hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
563 BIN offset = _binOffsetsExtended[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
564 for (BIN j = (startBin+offset); j <= (endBin+offset); ++j) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
565
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
566 // loop through each feature in this chrom/bin and see if it overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
567 // with the feature that was passed in. if so, add the feature to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
568 // the list of hits.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
569 vector<BEDCOVLIST>::iterator bedItr = bedCovListMap[a.chrom][j].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
570 vector<BEDCOVLIST>::iterator bedEnd = bedCovListMap[a.chrom][j].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
571 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
572
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
573 bool strands_are_same = (a.strand == bedItr->strand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
574 // skip the hit if not on the same strand (and we care)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
575 if ((sameStrand == true && strands_are_same == false) ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
576 (diffStrand == true && strands_are_same == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
577 )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
578 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
579 continue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
580 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
581 else if (overlaps(bedItr->start, bedItr->end, a.start, a.end) > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
582 bedItr->counts[index]++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
583 if (a.zeroLength == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
584 bedItr->depthMapList[index][a.start+1].starts++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
585 bedItr->depthMapList[index][a.end].ends++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
586 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
587 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
588 // correct for the fact that we artificially expanded the zeroLength feature
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
589 bedItr->depthMapList[index][a.start+2].starts++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
590 bedItr->depthMapList[index][a.end-1].ends++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
591 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
592
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
593 if (a.start < bedItr->minOverlapStarts[index]) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
594 bedItr->minOverlapStarts[index] = a.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
595 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
596 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
597 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
598 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
599 startBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
600 endBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
601 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
602 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
603
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
604 void BedFile::setZeroBased(bool zeroBased) { this->isZeroBased = zeroBased; }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
605
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
606 void BedFile::setGff (bool gff) { this->_isGff = gff; }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
607
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
608
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
609 void BedFile::setVcf (bool vcf) { this->_isVcf = vcf; }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
610
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
611
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
612 void BedFile::setFileType (FileType type) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
613 _fileType = type;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
614 _typeIsKnown = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
615 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
616
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
617
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
618 void BedFile::setBedType (int colNums) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
619 bedType = colNums;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
620 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
621
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
622
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
623 void BedFile::loadBedFileIntoMap() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
624
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
625 BED bedEntry, nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
626 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
627 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
628
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
629 Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
630 while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
631 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
632 BIN bin = getBin(bedEntry.start, bedEntry.end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
633 bedMap[bedEntry.chrom][bin].push_back(bedEntry);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
634 bedEntry = nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
635 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
636 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
637 Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
638 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
639
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
640
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
641 void BedFile::loadBedCovFileIntoMap() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
642
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
643 BED bedEntry, nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
644 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
645 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
646
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
647 Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
648 while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
649 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
650 BIN bin = getBin(bedEntry.start, bedEntry.end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
651
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
652 BEDCOV bedCov;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
653 bedCov.chrom = bedEntry.chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
654 bedCov.start = bedEntry.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
655 bedCov.end = bedEntry.end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
656 bedCov.name = bedEntry.name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
657 bedCov.score = bedEntry.score;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
658 bedCov.strand = bedEntry.strand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
659 bedCov.otherFields = bedEntry.otherFields;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
660 bedCov.zeroLength = bedEntry.zeroLength;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
661 bedCov.count = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
662 bedCov.minOverlapStart = INT_MAX;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
663
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
664 bedCovMap[bedEntry.chrom][bin].push_back(bedCov);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
665 bedEntry = nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
666 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
667 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
668 Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
669 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
670
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
671 void BedFile::loadBedCovListFileIntoMap() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
672
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
673 BED bedEntry, nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
674 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
675 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
676
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
677 Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
678 while ((bedStatus = GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
679 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
680 BIN bin = getBin(bedEntry.start, bedEntry.end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
681
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
682 BEDCOVLIST bedCovList;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
683 bedCovList.chrom = bedEntry.chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
684 bedCovList.start = bedEntry.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
685 bedCovList.end = bedEntry.end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
686 bedCovList.name = bedEntry.name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
687 bedCovList.score = bedEntry.score;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
688 bedCovList.strand = bedEntry.strand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
689 bedCovList.otherFields = bedEntry.otherFields;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
690 bedCovList.zeroLength = bedEntry.zeroLength;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
691
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
692 bedCovListMap[bedEntry.chrom][bin].push_back(bedCovList);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
693 bedEntry = nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
694 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
695 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
696 Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
697 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
698
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
699
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
700 void BedFile::loadBedFileIntoMapNoBin() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
701
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
702 BED bedEntry, nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
703 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
704 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
705
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
706 Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
707 while ((bedStatus = this->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
708 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
709 bedMapNoBin[bedEntry.chrom].push_back(bedEntry);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
710 bedEntry = nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
711 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
712 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
713 Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
714
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
715 // sort the BED entries for each chromosome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
716 // in ascending order of start position
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
717 for (masterBedMapNoBin::iterator m = this->bedMapNoBin.begin(); m != this->bedMapNoBin.end(); ++m) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
718 sort(m->second.begin(), m->second.end(), sortByStart);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
719 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
720 }