0
|
1 //
|
|
2 // bedFilePE.cpp
|
|
3 // BEDTools
|
|
4 //
|
|
5 // Created by Aaron Quinlan Spring 2009.
|
|
6 // Copyright 2009 Aaron Quinlan. All rights reserved.
|
|
7 //
|
|
8 // Summary: Contains common functions for finding BED overlaps.
|
|
9 //
|
|
10 // Acknowledgments: Much of the code herein is taken from Jim Kent's
|
|
11 // BED processing code. I am grateful for his elegant
|
|
12 // genome binning algorithm and therefore use it extensively.
|
|
13
|
|
14
|
|
15 #include "bedFilePE.h"
|
|
16
|
|
17
|
|
18 // Constructor
|
|
19 BedFilePE::BedFilePE(string &bedFile) {
|
|
20 this->bedFile = bedFile;
|
|
21 }
|
|
22
|
|
23 // Destructor
|
|
24 BedFilePE::~BedFilePE(void) {
|
|
25 }
|
|
26
|
|
27 void BedFilePE::Open(void) {
|
|
28 if (bedFile == "stdin" || bedFile == "-") {
|
|
29 _bedStream = &cin;
|
|
30 }
|
|
31 else {
|
|
32 _bedStream = new ifstream(bedFile.c_str(), ios::in);
|
|
33
|
|
34 if (isGzipFile(_bedStream) == true) {
|
|
35 delete _bedStream;
|
|
36 _bedStream = new igzstream(bedFile.c_str(), ios::in);
|
|
37 }
|
|
38 // can we open the file?
|
|
39 if ( !(_bedStream->good()) ) {
|
|
40 cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl;
|
|
41 exit (1);
|
|
42 }
|
|
43 }
|
|
44 }
|
|
45
|
|
46
|
|
47
|
|
48 // Close the BEDPE file
|
|
49 void BedFilePE::Close(void) {
|
|
50 if (bedFile != "stdin" && bedFile != "-") delete _bedStream;
|
|
51 }
|
|
52
|
|
53
|
|
54 BedLineStatus BedFilePE::GetNextBedPE (BEDPE &bedpe, int &lineNum) {
|
|
55
|
|
56 // make sure there are still lines to process.
|
|
57 // if so, tokenize, validate and return the BEDPE entry.
|
|
58 if (_bedStream->good()) {
|
|
59 string bedPELine;
|
|
60 vector<string> bedPEFields;
|
|
61 bedPEFields.reserve(10);
|
|
62
|
|
63 // parse the bedStream pointer
|
|
64 getline(*_bedStream, bedPELine);
|
|
65 lineNum++;
|
|
66
|
|
67 // split into a string vector.
|
|
68 Tokenize(bedPELine,bedPEFields);
|
|
69
|
|
70 // load the BEDPE struct as long as it's a valid BEDPE entry.
|
|
71 return parseLine(bedpe, bedPEFields, lineNum);
|
|
72 }
|
|
73 // default if file is closed or EOF
|
|
74 return BED_INVALID;
|
|
75 }
|
|
76
|
|
77
|
|
78 /*
|
|
79 reportBedPETab
|
|
80
|
|
81 Writes the _original_ BED entry for A.
|
|
82 Works for BEDPE only.
|
|
83 */
|
|
84 void BedFilePE::reportBedPETab(const BEDPE &a) {
|
|
85
|
|
86 if (this->bedType == 6) {
|
|
87 printf("%s\t%d\t%d\t%s\t%d\t%d\t", a.chrom1.c_str(), a.start1, a.end1,
|
|
88 a.chrom2.c_str(), a.start2, a.end2);
|
|
89 }
|
|
90 else if (this->bedType == 7) {
|
|
91 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t", a.chrom1.c_str(), a.start1, a.end1,
|
|
92 a.chrom2.c_str(), a.start2, a.end2,
|
|
93 a.name.c_str());
|
|
94 }
|
|
95 else if (this->bedType == 8) {
|
|
96 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t", a.chrom1.c_str(), a.start1, a.end1,
|
|
97 a.chrom2.c_str(), a.start2, a.end2,
|
|
98 a.name.c_str(), a.score.c_str());
|
|
99 }
|
|
100 else if (this->bedType == 10) {
|
|
101 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", a.chrom1.c_str(), a.start1, a.end1,
|
|
102 a.chrom2.c_str(), a.start2, a.end2,
|
|
103 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());
|
|
104 }
|
|
105 else if (this->bedType > 10) {
|
|
106 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", a.chrom1.c_str(), a.start1, a.end1,
|
|
107 a.chrom2.c_str(), a.start2, a.end2,
|
|
108 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());
|
|
109
|
|
110 vector<string>::const_iterator othIt = a.otherFields.begin();
|
|
111 vector<string>::const_iterator othEnd = a.otherFields.end();
|
|
112 for ( ; othIt != othEnd; ++othIt) {
|
|
113 printf("\t%s", othIt->c_str());
|
|
114 }
|
|
115 printf("\t");
|
|
116 }
|
|
117 }
|
|
118
|
|
119
|
|
120
|
|
121 /*
|
|
122 reportBedPENewLine
|
|
123
|
|
124 Writes the _original_ BED entry for A.
|
|
125 Works for BEDPE only.
|
|
126 */
|
|
127 void BedFilePE::reportBedPENewLine(const BEDPE &a) {
|
|
128
|
|
129 if (this->bedType == 6) {
|
|
130 printf("%s\t%d\t%d\t%s\t%d\t%d\n", a.chrom1.c_str(), a.start1, a.end1,
|
|
131 a.chrom2.c_str(), a.start2, a.end2);
|
|
132 }
|
|
133 else if (this->bedType == 7) {
|
|
134 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\n", a.chrom1.c_str(), a.start1, a.end1,
|
|
135 a.chrom2.c_str(), a.start2, a.end2,
|
|
136 a.name.c_str());
|
|
137 }
|
|
138 else if (this->bedType == 8) {
|
|
139 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\n", a.chrom1.c_str(), a.start1, a.end1,
|
|
140 a.chrom2.c_str(), a.start2, a.end2,
|
|
141 a.name.c_str(), a.score.c_str());
|
|
142 }
|
|
143 else if (this->bedType == 10) {
|
|
144 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", a.chrom1.c_str(), a.start1, a.end1,
|
|
145 a.chrom2.c_str(), a.start2, a.end2,
|
|
146 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());
|
|
147 }
|
|
148 else if (this->bedType > 10) {
|
|
149 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", a.chrom1.c_str(), a.start1, a.end1,
|
|
150 a.chrom2.c_str(), a.start2, a.end2,
|
|
151 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());
|
|
152
|
|
153 vector<string>::const_iterator othIt = a.otherFields.begin();
|
|
154 vector<string>::const_iterator othEnd = a.otherFields.end();
|
|
155 for ( ; othIt != othEnd; ++othIt) {
|
|
156 printf("\t%s", othIt->c_str());
|
|
157 }
|
|
158 printf("\n");
|
|
159 }
|
|
160 }
|
|
161
|
|
162
|
|
163 BedLineStatus BedFilePE::parseLine (BEDPE &bedpe, const vector<string> &lineVector, int &lineNum) {
|
|
164
|
|
165 // bail out if we have a blank line
|
|
166 if (lineVector.empty())
|
|
167 return BED_BLANK;
|
|
168
|
|
169 if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") == string::npos) && (lineVector[0].find("#") == string::npos) ) {
|
|
170 // we need at least 6 columns
|
|
171 if (lineVector.size() >= 6) {
|
|
172 if (parseBedPELine(bedpe, lineVector, lineNum) == true)
|
|
173 return BED_VALID;
|
|
174 else return BED_INVALID;
|
|
175 }
|
|
176 else {
|
|
177 cerr << "It looks as though you have less than 6 columns. Are you sure your files are tab-delimited?" << endl;
|
|
178 exit(1);
|
|
179 }
|
|
180 }
|
|
181 else {
|
|
182 lineNum--;
|
|
183 return BED_HEADER;
|
|
184 }
|
|
185
|
|
186 // default
|
|
187 return BED_INVALID;
|
|
188 }
|
|
189
|
|
190
|
|
191 bool BedFilePE::parseBedPELine (BEDPE &bed, const vector<string> &lineVector, const int &lineNum) {
|
|
192
|
|
193 if ((lineNum == 1) && (lineVector.size() >= 6)) {
|
|
194
|
|
195 this->bedType = lineVector.size();
|
|
196
|
|
197 if (this->bedType == 6) {
|
|
198 bed.chrom1 = lineVector[0];
|
|
199 bed.start1 = atoi(lineVector[1].c_str());
|
|
200 bed.end1 = atoi(lineVector[2].c_str());
|
|
201
|
|
202 bed.chrom2 = lineVector[3];
|
|
203 bed.start2 = atoi(lineVector[4].c_str());
|
|
204 bed.end2 = atoi(lineVector[5].c_str());
|
|
205
|
|
206 return true;
|
|
207 }
|
|
208 else if (this->bedType == 7) {
|
|
209 bed.chrom1 = lineVector[0];
|
|
210 bed.start1 = atoi(lineVector[1].c_str());
|
|
211 bed.end1 = atoi(lineVector[2].c_str());
|
|
212
|
|
213 bed.chrom2 = lineVector[3];
|
|
214 bed.start2 = atoi(lineVector[4].c_str());
|
|
215 bed.end2 = atoi(lineVector[5].c_str());
|
|
216
|
|
217 bed.name = lineVector[6];
|
|
218 return true;
|
|
219 }
|
|
220 else if (this->bedType == 8) {
|
|
221 bed.chrom1 = lineVector[0];
|
|
222 bed.start1 = atoi(lineVector[1].c_str());
|
|
223 bed.end1 = atoi(lineVector[2].c_str());
|
|
224
|
|
225 bed.chrom2 = lineVector[3];
|
|
226 bed.start2 = atoi(lineVector[4].c_str());
|
|
227 bed.end2 = atoi(lineVector[5].c_str());
|
|
228
|
|
229 bed.name = lineVector[6];
|
|
230 bed.score = lineVector[7].c_str();
|
|
231 return true;
|
|
232 }
|
|
233 else if (this->bedType == 10) {
|
|
234 bed.chrom1 = lineVector[0];
|
|
235 bed.start1 = atoi(lineVector[1].c_str());
|
|
236 bed.end1 = atoi(lineVector[2].c_str());
|
|
237
|
|
238 bed.chrom2 = lineVector[3];
|
|
239 bed.start2 = atoi(lineVector[4].c_str());
|
|
240 bed.end2 = atoi(lineVector[5].c_str());
|
|
241
|
|
242 bed.name = lineVector[6];
|
|
243 bed.score = lineVector[7].c_str();
|
|
244
|
|
245 bed.strand1 = lineVector[8];
|
|
246 bed.strand2 = lineVector[9];
|
|
247
|
|
248 return true;
|
|
249 }
|
|
250 else if (this->bedType > 10) {
|
|
251 bed.chrom1 = lineVector[0];
|
|
252 bed.start1 = atoi(lineVector[1].c_str());
|
|
253 bed.end1 = atoi(lineVector[2].c_str());
|
|
254
|
|
255 bed.chrom2 = lineVector[3];
|
|
256 bed.start2 = atoi(lineVector[4].c_str());
|
|
257 bed.end2 = atoi(lineVector[5].c_str());
|
|
258
|
|
259 bed.name = lineVector[6];
|
|
260 bed.score = lineVector[7].c_str();
|
|
261
|
|
262 bed.strand1 = lineVector[8];
|
|
263 bed.strand2 = lineVector[9];
|
|
264
|
|
265 for (unsigned int i = 10; i < lineVector.size(); ++i) {
|
|
266 bed.otherFields.push_back(lineVector[i]);
|
|
267 }
|
|
268 return true;
|
|
269 }
|
|
270 else {
|
|
271 cerr << "Unexpected number of fields: " << lineNum << ". Verify that your files are TAB-delimited and that your BEDPE file has 6,7,8 or 10 fields. Exiting..." << endl;
|
|
272 exit(1);
|
|
273 }
|
|
274
|
|
275 if (bed.start1 > bed.end1) {
|
|
276 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Start1 was greater than End1. Ignoring it and moving on." << endl;
|
|
277 return false;
|
|
278 }
|
|
279 else if (bed.start2 > bed.end2) {
|
|
280 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Start2 was greater than End2. Ignoring it and moving on." << endl;
|
|
281 return false;
|
|
282 }
|
|
283 else if ( (bed.start1 < 0) || (bed.end1 < 0) || (bed.start2 < 0) || (bed.end2 < 0) ) {
|
|
284 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Coordinate <= 0. Ignoring it and moving on." << endl;
|
|
285 return false;
|
|
286 }
|
|
287 }
|
|
288 else if ( (lineNum > 1) && (lineVector.size() == this->bedType)) {
|
|
289
|
|
290 if (this->bedType == 6) {
|
|
291 bed.chrom1 = lineVector[0];
|
|
292 bed.start1 = atoi(lineVector[1].c_str());
|
|
293 bed.end1 = atoi(lineVector[2].c_str());
|
|
294
|
|
295 bed.chrom2 = lineVector[3];
|
|
296 bed.start2 = atoi(lineVector[4].c_str());
|
|
297 bed.end2 = atoi(lineVector[5].c_str());
|
|
298
|
|
299 return true;
|
|
300 }
|
|
301 else if (this->bedType == 7) {
|
|
302 bed.chrom1 = lineVector[0];
|
|
303 bed.start1 = atoi(lineVector[1].c_str());
|
|
304 bed.end1 = atoi(lineVector[2].c_str());
|
|
305
|
|
306 bed.chrom2 = lineVector[3];
|
|
307 bed.start2 = atoi(lineVector[4].c_str());
|
|
308 bed.end2 = atoi(lineVector[5].c_str());
|
|
309
|
|
310 bed.name = lineVector[6];
|
|
311 return true;
|
|
312 }
|
|
313 else if (this->bedType == 8) {
|
|
314 bed.chrom1 = lineVector[0];
|
|
315 bed.start1 = atoi(lineVector[1].c_str());
|
|
316 bed.end1 = atoi(lineVector[2].c_str());
|
|
317
|
|
318 bed.chrom2 = lineVector[3];
|
|
319 bed.start2 = atoi(lineVector[4].c_str());
|
|
320 bed.end2 = atoi(lineVector[5].c_str());
|
|
321
|
|
322 bed.name = lineVector[6];
|
|
323 bed.score = lineVector[7].c_str();
|
|
324 return true;
|
|
325 }
|
|
326 else if (this->bedType == 10) {
|
|
327 bed.chrom1 = lineVector[0];
|
|
328 bed.start1 = atoi(lineVector[1].c_str());
|
|
329 bed.end1 = atoi(lineVector[2].c_str());
|
|
330
|
|
331 bed.chrom2 = lineVector[3];
|
|
332 bed.start2 = atoi(lineVector[4].c_str());
|
|
333 bed.end2 = atoi(lineVector[5].c_str());
|
|
334
|
|
335 bed.name = lineVector[6];
|
|
336 bed.score = lineVector[7].c_str();
|
|
337
|
|
338 bed.strand1 = lineVector[8];
|
|
339 bed.strand2 = lineVector[9];
|
|
340
|
|
341 return true;
|
|
342 }
|
|
343 else if (this->bedType > 10) {
|
|
344 bed.chrom1 = lineVector[0];
|
|
345 bed.start1 = atoi(lineVector[1].c_str());
|
|
346 bed.end1 = atoi(lineVector[2].c_str());
|
|
347
|
|
348 bed.chrom2 = lineVector[3];
|
|
349 bed.start2 = atoi(lineVector[4].c_str());
|
|
350 bed.end2 = atoi(lineVector[5].c_str());
|
|
351
|
|
352 bed.name = lineVector[6];
|
|
353 bed.score = lineVector[7].c_str();
|
|
354
|
|
355 bed.strand1 = lineVector[8];
|
|
356 bed.strand2 = lineVector[9];
|
|
357
|
|
358 for (unsigned int i = 10; i < lineVector.size(); ++i) {
|
|
359 bed.otherFields.push_back(lineVector[i]);
|
|
360 }
|
|
361 return true;
|
|
362 }
|
|
363 else {
|
|
364 cerr << "Unexpected number of fields: " << lineNum << ". Verify that your files are TAB-delimited and that your BEDPE file has 6,7,8 or 10 fields. Exiting..." << endl;
|
|
365 exit(1);
|
|
366 }
|
|
367
|
|
368 if (bed.start1 > bed.end1) {
|
|
369 cerr << "Error: malformed BED entry at line " << lineNum << ". Start1 was greater than End1. Ignoring it and moving on." << endl;
|
|
370 return false;
|
|
371 }
|
|
372 else if (bed.start2 > bed.end2) {
|
|
373 cerr << "Error: malformed BED entry at line " << lineNum << ". Start2 was greater than End2. Ignoring it and moving on." << endl;
|
|
374 return false;
|
|
375 }
|
|
376 else if ( (bed.start1 < 0) || (bed.end1 < 0) || (bed.start2 < 0) || (bed.end2 < 0) ) {
|
|
377 cerr << "Error: malformed BED entry at line " << lineNum << ". Coordinate <= 0. Ignoring it and moving on." << endl;
|
|
378 return false;
|
|
379 }
|
|
380 }
|
|
381 else if (lineVector.size() == 1) {
|
|
382 cerr << "Only one BED field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl;
|
|
383 exit(1);
|
|
384 }
|
|
385 else if ((lineVector.size() != this->bedType) && (lineVector.size() != 0)) {
|
|
386 cerr << "Differing number of BEDPE fields encountered at line: " << lineNum << ". Exiting..." << endl;
|
|
387 exit(1);
|
|
388 }
|
|
389 else if ((lineVector.size() < 6) && (lineVector.size() != 0)) {
|
|
390 cerr << "TAB delimited BEDPE file with at least 6 fields (chrom1, start1, end1, chrom2, start2, end2) is required at line: "<< lineNum << ". Exiting..." << endl;
|
|
391 exit(1);
|
|
392 }
|
|
393 return false;
|
|
394 }
|
|
395
|
|
396
|
|
397 /*
|
|
398 Adapted from kent source "binKeeperFind"
|
|
399 */
|
|
400 void BedFilePE::FindOverlapsPerBin(int bEnd, string chrom, CHRPOS start, CHRPOS end, string name, string strand,
|
|
401 vector<MATE> &hits, float overlapFraction, bool forceStrand, bool enforceDiffNames) {
|
|
402
|
|
403 int startBin, endBin;
|
|
404 startBin = (start >> _binFirstShift);
|
|
405 endBin = ((end-1) >> _binFirstShift);
|
|
406
|
|
407 // loop through each bin "level" in the binning hierarchy
|
|
408 for (int i = 0; i < _binLevels; ++i) {
|
|
409
|
|
410 // loop through each bin at this level of the hierarchy
|
|
411 int offset = _binOffsetsExtended[i];
|
|
412 for (int j = (startBin+offset); j <= (endBin+offset); ++j) {
|
|
413
|
|
414 // loop through each feature in this chrom/bin and see if it overlaps
|
|
415 // with the feature that was passed in. if so, add the feature to
|
|
416 // the list of hits.
|
|
417 vector<MATE>::const_iterator bedItr;
|
|
418 vector<MATE>::const_iterator bedEnd;
|
|
419 if (bEnd == 1) {
|
|
420 bedItr = bedMapEnd1[chrom][j].begin();
|
|
421 bedEnd = bedMapEnd1[chrom][j].end();
|
|
422 }
|
|
423 else if (bEnd == 2) {
|
|
424 bedItr = bedMapEnd2[chrom][j].begin();
|
|
425 bedEnd = bedMapEnd2[chrom][j].end();
|
|
426 }
|
|
427 else {
|
|
428 cerr << "Unexpected end of B requested" << endl;
|
|
429 }
|
|
430 for (; bedItr != bedEnd; ++bedItr) {
|
|
431 float overlap = overlaps(bedItr->bed.start, bedItr->bed.end, start, end);
|
|
432 float size = end - start;
|
|
433
|
|
434 if ( (overlap / size) >= overlapFraction ) {
|
|
435
|
|
436 // skip the hit if not on the same strand (and we care)
|
|
437 if ((forceStrand == false) && (enforceDiffNames == false)) {
|
|
438 hits.push_back(*bedItr); // it's a hit, add it.
|
|
439 }
|
|
440 else if ((forceStrand == true) && (enforceDiffNames == false)) {
|
|
441 if (strand == bedItr->bed.strand)
|
|
442 hits.push_back(*bedItr); // it's a hit, add it.
|
|
443 }
|
|
444 else if ((forceStrand == true) && (enforceDiffNames == true)) {
|
|
445 if ((strand == bedItr->bed.strand) && (name != bedItr->bed.name))
|
|
446 hits.push_back(*bedItr); // it's a hit, add it.
|
|
447 }
|
|
448 else if ((forceStrand == false) && (enforceDiffNames == true)) {
|
|
449 if (name != bedItr->bed.name)
|
|
450 hits.push_back(*bedItr); // it's a hit, add it.
|
|
451 }
|
|
452 }
|
|
453
|
|
454 }
|
|
455 }
|
|
456 startBin >>= _binNextShift;
|
|
457 endBin >>= _binNextShift;
|
|
458 }
|
|
459 }
|
|
460
|
|
461
|
|
462 void BedFilePE::loadBedPEFileIntoMap() {
|
|
463
|
|
464 int lineNum = 0;
|
|
465 int bin1, bin2;
|
|
466 BedLineStatus bedStatus;
|
|
467 BEDPE bedpeEntry, nullBedPE;
|
|
468
|
|
469 Open();
|
|
470 bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);
|
|
471 while (bedStatus != BED_INVALID) {
|
|
472
|
|
473 if (bedStatus == BED_VALID) {
|
|
474 MATE *bedEntry1 = new MATE();
|
|
475 MATE *bedEntry2 = new MATE();
|
|
476 // separate the BEDPE entry into separate
|
|
477 // BED entries
|
|
478 splitBedPEIntoBeds(bedpeEntry, lineNum, bedEntry1, bedEntry2);
|
|
479
|
|
480 // load end1 into a UCSC bin map
|
|
481 bin1 = getBin(bedEntry1->bed.start, bedEntry1->bed.end);
|
|
482 this->bedMapEnd1[bedEntry1->bed.chrom][bin1].push_back(*bedEntry1);
|
|
483
|
|
484 // load end2 into a UCSC bin map
|
|
485 bin2 = getBin(bedEntry2->bed.start, bedEntry2->bed.end);
|
|
486 this->bedMapEnd2[bedEntry2->bed.chrom][bin2].push_back(*bedEntry2);
|
|
487
|
|
488 bedpeEntry = nullBedPE;
|
|
489 }
|
|
490 bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);
|
|
491 }
|
|
492 Close();
|
|
493 }
|
|
494
|
|
495
|
|
496 void BedFilePE::splitBedPEIntoBeds(const BEDPE &bedpeEntry, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2) {
|
|
497
|
|
498 /*
|
|
499 Split the BEDPE entry into separate BED entries
|
|
500
|
|
501 NOTE: I am using a trick here where I store
|
|
502 the lineNum of the BEDPE from the original file
|
|
503 in the "count" column. This allows me to later
|
|
504 resolve whether the hits found on both ends of BEDPE A
|
|
505 came from the same entry in BEDPE B. Tracking by "name"
|
|
506 alone with fail when there are multiple mappings for a given
|
|
507 read-pair.
|
|
508 */
|
|
509
|
|
510 bedEntry1->bed.chrom = bedpeEntry.chrom1;
|
|
511 bedEntry1->bed.start = bedpeEntry.start1;
|
|
512 bedEntry1->bed.end = bedpeEntry.end1;
|
|
513 bedEntry1->bed.name = bedpeEntry.name;
|
|
514 bedEntry1->bed.score = bedpeEntry.score; // only store the score in end1 to save memory
|
|
515 bedEntry1->bed.strand = bedpeEntry.strand1;
|
|
516 bedEntry1->bed.otherFields = bedpeEntry.otherFields; // only store the otherFields in end1 to save memory
|
|
517 bedEntry1->lineNum = lineNum;
|
|
518 bedEntry1->mate = bedEntry2; // keep a pointer to end2
|
|
519
|
|
520 bedEntry2->bed.chrom = bedpeEntry.chrom2;
|
|
521 bedEntry2->bed.start = bedpeEntry.start2;
|
|
522 bedEntry2->bed.end = bedpeEntry.end2;
|
|
523 bedEntry2->bed.name = bedpeEntry.name;
|
|
524 bedEntry2->bed.strand = bedpeEntry.strand2;
|
|
525 bedEntry2->lineNum = lineNum;
|
|
526 bedEntry2->mate = bedEntry1; // keep a pointer to end1
|
|
527 }
|
|
528
|
|
529
|
|
530
|