Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp @ 1:bec36315bd12 default tip
Deleted selected files
| author | aaronquinlan |
|---|---|
| date | Sat, 19 Nov 2011 14:17:03 -0500 |
| parents | dfcd8b6c1bda |
| children |
comparison
equal
deleted
inserted
replaced
| 0:dfcd8b6c1bda | 1:bec36315bd12 |
|---|---|
| 1 // | |
| 2 // bedFilePE.cpp | |
| 3 // BEDTools | |
| 4 // | |
| 5 // Created by Aaron Quinlan Spring 2009. | |
| 6 // Copyright 2009 Aaron Quinlan. All rights reserved. | |
| 7 // | |
| 8 // Summary: Contains common functions for finding BED overlaps. | |
| 9 // | |
| 10 // Acknowledgments: Much of the code herein is taken from Jim Kent's | |
| 11 // BED processing code. I am grateful for his elegant | |
| 12 // genome binning algorithm and therefore use it extensively. | |
| 13 | |
| 14 | |
| 15 #include "bedFilePE.h" | |
| 16 | |
| 17 | |
| 18 // Constructor | |
| 19 BedFilePE::BedFilePE(string &bedFile) { | |
| 20 this->bedFile = bedFile; | |
| 21 } | |
| 22 | |
| 23 // Destructor | |
| 24 BedFilePE::~BedFilePE(void) { | |
| 25 } | |
| 26 | |
| 27 void BedFilePE::Open(void) { | |
| 28 if (bedFile == "stdin" || bedFile == "-") { | |
| 29 _bedStream = &cin; | |
| 30 } | |
| 31 else { | |
| 32 _bedStream = new ifstream(bedFile.c_str(), ios::in); | |
| 33 | |
| 34 if (isGzipFile(_bedStream) == true) { | |
| 35 delete _bedStream; | |
| 36 _bedStream = new igzstream(bedFile.c_str(), ios::in); | |
| 37 } | |
| 38 // can we open the file? | |
| 39 if ( !(_bedStream->good()) ) { | |
| 40 cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl; | |
| 41 exit (1); | |
| 42 } | |
| 43 } | |
| 44 } | |
| 45 | |
| 46 | |
| 47 | |
| 48 // Close the BEDPE file | |
| 49 void BedFilePE::Close(void) { | |
| 50 if (bedFile != "stdin" && bedFile != "-") delete _bedStream; | |
| 51 } | |
| 52 | |
| 53 | |
| 54 BedLineStatus BedFilePE::GetNextBedPE (BEDPE &bedpe, int &lineNum) { | |
| 55 | |
| 56 // make sure there are still lines to process. | |
| 57 // if so, tokenize, validate and return the BEDPE entry. | |
| 58 if (_bedStream->good()) { | |
| 59 string bedPELine; | |
| 60 vector<string> bedPEFields; | |
| 61 bedPEFields.reserve(10); | |
| 62 | |
| 63 // parse the bedStream pointer | |
| 64 getline(*_bedStream, bedPELine); | |
| 65 lineNum++; | |
| 66 | |
| 67 // split into a string vector. | |
| 68 Tokenize(bedPELine,bedPEFields); | |
| 69 | |
| 70 // load the BEDPE struct as long as it's a valid BEDPE entry. | |
| 71 return parseLine(bedpe, bedPEFields, lineNum); | |
| 72 } | |
| 73 // default if file is closed or EOF | |
| 74 return BED_INVALID; | |
| 75 } | |
| 76 | |
| 77 | |
| 78 /* | |
| 79 reportBedPETab | |
| 80 | |
| 81 Writes the _original_ BED entry for A. | |
| 82 Works for BEDPE only. | |
| 83 */ | |
| 84 void BedFilePE::reportBedPETab(const BEDPE &a) { | |
| 85 | |
| 86 if (this->bedType == 6) { | |
| 87 printf("%s\t%d\t%d\t%s\t%d\t%d\t", a.chrom1.c_str(), a.start1, a.end1, | |
| 88 a.chrom2.c_str(), a.start2, a.end2); | |
| 89 } | |
| 90 else if (this->bedType == 7) { | |
| 91 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t", a.chrom1.c_str(), a.start1, a.end1, | |
| 92 a.chrom2.c_str(), a.start2, a.end2, | |
| 93 a.name.c_str()); | |
| 94 } | |
| 95 else if (this->bedType == 8) { | |
| 96 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t", a.chrom1.c_str(), a.start1, a.end1, | |
| 97 a.chrom2.c_str(), a.start2, a.end2, | |
| 98 a.name.c_str(), a.score.c_str()); | |
| 99 } | |
| 100 else if (this->bedType == 10) { | |
| 101 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", a.chrom1.c_str(), a.start1, a.end1, | |
| 102 a.chrom2.c_str(), a.start2, a.end2, | |
| 103 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str()); | |
| 104 } | |
| 105 else if (this->bedType > 10) { | |
| 106 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", a.chrom1.c_str(), a.start1, a.end1, | |
| 107 a.chrom2.c_str(), a.start2, a.end2, | |
| 108 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str()); | |
| 109 | |
| 110 vector<string>::const_iterator othIt = a.otherFields.begin(); | |
| 111 vector<string>::const_iterator othEnd = a.otherFields.end(); | |
| 112 for ( ; othIt != othEnd; ++othIt) { | |
| 113 printf("\t%s", othIt->c_str()); | |
| 114 } | |
| 115 printf("\t"); | |
| 116 } | |
| 117 } | |
| 118 | |
| 119 | |
| 120 | |
| 121 /* | |
| 122 reportBedPENewLine | |
| 123 | |
| 124 Writes the _original_ BED entry for A. | |
| 125 Works for BEDPE only. | |
| 126 */ | |
| 127 void BedFilePE::reportBedPENewLine(const BEDPE &a) { | |
| 128 | |
| 129 if (this->bedType == 6) { | |
| 130 printf("%s\t%d\t%d\t%s\t%d\t%d\n", a.chrom1.c_str(), a.start1, a.end1, | |
| 131 a.chrom2.c_str(), a.start2, a.end2); | |
| 132 } | |
| 133 else if (this->bedType == 7) { | |
| 134 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\n", a.chrom1.c_str(), a.start1, a.end1, | |
| 135 a.chrom2.c_str(), a.start2, a.end2, | |
| 136 a.name.c_str()); | |
| 137 } | |
| 138 else if (this->bedType == 8) { | |
| 139 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\n", a.chrom1.c_str(), a.start1, a.end1, | |
| 140 a.chrom2.c_str(), a.start2, a.end2, | |
| 141 a.name.c_str(), a.score.c_str()); | |
| 142 } | |
| 143 else if (this->bedType == 10) { | |
| 144 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", a.chrom1.c_str(), a.start1, a.end1, | |
| 145 a.chrom2.c_str(), a.start2, a.end2, | |
| 146 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str()); | |
| 147 } | |
| 148 else if (this->bedType > 10) { | |
| 149 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", a.chrom1.c_str(), a.start1, a.end1, | |
| 150 a.chrom2.c_str(), a.start2, a.end2, | |
| 151 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str()); | |
| 152 | |
| 153 vector<string>::const_iterator othIt = a.otherFields.begin(); | |
| 154 vector<string>::const_iterator othEnd = a.otherFields.end(); | |
| 155 for ( ; othIt != othEnd; ++othIt) { | |
| 156 printf("\t%s", othIt->c_str()); | |
| 157 } | |
| 158 printf("\n"); | |
| 159 } | |
| 160 } | |
| 161 | |
| 162 | |
| 163 BedLineStatus BedFilePE::parseLine (BEDPE &bedpe, const vector<string> &lineVector, int &lineNum) { | |
| 164 | |
| 165 // bail out if we have a blank line | |
| 166 if (lineVector.empty()) | |
| 167 return BED_BLANK; | |
| 168 | |
| 169 if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") == string::npos) && (lineVector[0].find("#") == string::npos) ) { | |
| 170 // we need at least 6 columns | |
| 171 if (lineVector.size() >= 6) { | |
| 172 if (parseBedPELine(bedpe, lineVector, lineNum) == true) | |
| 173 return BED_VALID; | |
| 174 else return BED_INVALID; | |
| 175 } | |
| 176 else { | |
| 177 cerr << "It looks as though you have less than 6 columns. Are you sure your files are tab-delimited?" << endl; | |
| 178 exit(1); | |
| 179 } | |
| 180 } | |
| 181 else { | |
| 182 lineNum--; | |
| 183 return BED_HEADER; | |
| 184 } | |
| 185 | |
| 186 // default | |
| 187 return BED_INVALID; | |
| 188 } | |
| 189 | |
| 190 | |
| 191 bool BedFilePE::parseBedPELine (BEDPE &bed, const vector<string> &lineVector, const int &lineNum) { | |
| 192 | |
| 193 if ((lineNum == 1) && (lineVector.size() >= 6)) { | |
| 194 | |
| 195 this->bedType = lineVector.size(); | |
| 196 | |
| 197 if (this->bedType == 6) { | |
| 198 bed.chrom1 = lineVector[0]; | |
| 199 bed.start1 = atoi(lineVector[1].c_str()); | |
| 200 bed.end1 = atoi(lineVector[2].c_str()); | |
| 201 | |
| 202 bed.chrom2 = lineVector[3]; | |
| 203 bed.start2 = atoi(lineVector[4].c_str()); | |
| 204 bed.end2 = atoi(lineVector[5].c_str()); | |
| 205 | |
| 206 return true; | |
| 207 } | |
| 208 else if (this->bedType == 7) { | |
| 209 bed.chrom1 = lineVector[0]; | |
| 210 bed.start1 = atoi(lineVector[1].c_str()); | |
| 211 bed.end1 = atoi(lineVector[2].c_str()); | |
| 212 | |
| 213 bed.chrom2 = lineVector[3]; | |
| 214 bed.start2 = atoi(lineVector[4].c_str()); | |
| 215 bed.end2 = atoi(lineVector[5].c_str()); | |
| 216 | |
| 217 bed.name = lineVector[6]; | |
| 218 return true; | |
| 219 } | |
| 220 else if (this->bedType == 8) { | |
| 221 bed.chrom1 = lineVector[0]; | |
| 222 bed.start1 = atoi(lineVector[1].c_str()); | |
| 223 bed.end1 = atoi(lineVector[2].c_str()); | |
| 224 | |
| 225 bed.chrom2 = lineVector[3]; | |
| 226 bed.start2 = atoi(lineVector[4].c_str()); | |
| 227 bed.end2 = atoi(lineVector[5].c_str()); | |
| 228 | |
| 229 bed.name = lineVector[6]; | |
| 230 bed.score = lineVector[7].c_str(); | |
| 231 return true; | |
| 232 } | |
| 233 else if (this->bedType == 10) { | |
| 234 bed.chrom1 = lineVector[0]; | |
| 235 bed.start1 = atoi(lineVector[1].c_str()); | |
| 236 bed.end1 = atoi(lineVector[2].c_str()); | |
| 237 | |
| 238 bed.chrom2 = lineVector[3]; | |
| 239 bed.start2 = atoi(lineVector[4].c_str()); | |
| 240 bed.end2 = atoi(lineVector[5].c_str()); | |
| 241 | |
| 242 bed.name = lineVector[6]; | |
| 243 bed.score = lineVector[7].c_str(); | |
| 244 | |
| 245 bed.strand1 = lineVector[8]; | |
| 246 bed.strand2 = lineVector[9]; | |
| 247 | |
| 248 return true; | |
| 249 } | |
| 250 else if (this->bedType > 10) { | |
| 251 bed.chrom1 = lineVector[0]; | |
| 252 bed.start1 = atoi(lineVector[1].c_str()); | |
| 253 bed.end1 = atoi(lineVector[2].c_str()); | |
| 254 | |
| 255 bed.chrom2 = lineVector[3]; | |
| 256 bed.start2 = atoi(lineVector[4].c_str()); | |
| 257 bed.end2 = atoi(lineVector[5].c_str()); | |
| 258 | |
| 259 bed.name = lineVector[6]; | |
| 260 bed.score = lineVector[7].c_str(); | |
| 261 | |
| 262 bed.strand1 = lineVector[8]; | |
| 263 bed.strand2 = lineVector[9]; | |
| 264 | |
| 265 for (unsigned int i = 10; i < lineVector.size(); ++i) { | |
| 266 bed.otherFields.push_back(lineVector[i]); | |
| 267 } | |
| 268 return true; | |
| 269 } | |
| 270 else { | |
| 271 cerr << "Unexpected number of fields: " << lineNum << ". Verify that your files are TAB-delimited and that your BEDPE file has 6,7,8 or 10 fields. Exiting..." << endl; | |
| 272 exit(1); | |
| 273 } | |
| 274 | |
| 275 if (bed.start1 > bed.end1) { | |
| 276 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Start1 was greater than End1. Ignoring it and moving on." << endl; | |
| 277 return false; | |
| 278 } | |
| 279 else if (bed.start2 > bed.end2) { | |
| 280 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Start2 was greater than End2. Ignoring it and moving on." << endl; | |
| 281 return false; | |
| 282 } | |
| 283 else if ( (bed.start1 < 0) || (bed.end1 < 0) || (bed.start2 < 0) || (bed.end2 < 0) ) { | |
| 284 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Coordinate <= 0. Ignoring it and moving on." << endl; | |
| 285 return false; | |
| 286 } | |
| 287 } | |
| 288 else if ( (lineNum > 1) && (lineVector.size() == this->bedType)) { | |
| 289 | |
| 290 if (this->bedType == 6) { | |
| 291 bed.chrom1 = lineVector[0]; | |
| 292 bed.start1 = atoi(lineVector[1].c_str()); | |
| 293 bed.end1 = atoi(lineVector[2].c_str()); | |
| 294 | |
| 295 bed.chrom2 = lineVector[3]; | |
| 296 bed.start2 = atoi(lineVector[4].c_str()); | |
| 297 bed.end2 = atoi(lineVector[5].c_str()); | |
| 298 | |
| 299 return true; | |
| 300 } | |
| 301 else if (this->bedType == 7) { | |
| 302 bed.chrom1 = lineVector[0]; | |
| 303 bed.start1 = atoi(lineVector[1].c_str()); | |
| 304 bed.end1 = atoi(lineVector[2].c_str()); | |
| 305 | |
| 306 bed.chrom2 = lineVector[3]; | |
| 307 bed.start2 = atoi(lineVector[4].c_str()); | |
| 308 bed.end2 = atoi(lineVector[5].c_str()); | |
| 309 | |
| 310 bed.name = lineVector[6]; | |
| 311 return true; | |
| 312 } | |
| 313 else if (this->bedType == 8) { | |
| 314 bed.chrom1 = lineVector[0]; | |
| 315 bed.start1 = atoi(lineVector[1].c_str()); | |
| 316 bed.end1 = atoi(lineVector[2].c_str()); | |
| 317 | |
| 318 bed.chrom2 = lineVector[3]; | |
| 319 bed.start2 = atoi(lineVector[4].c_str()); | |
| 320 bed.end2 = atoi(lineVector[5].c_str()); | |
| 321 | |
| 322 bed.name = lineVector[6]; | |
| 323 bed.score = lineVector[7].c_str(); | |
| 324 return true; | |
| 325 } | |
| 326 else if (this->bedType == 10) { | |
| 327 bed.chrom1 = lineVector[0]; | |
| 328 bed.start1 = atoi(lineVector[1].c_str()); | |
| 329 bed.end1 = atoi(lineVector[2].c_str()); | |
| 330 | |
| 331 bed.chrom2 = lineVector[3]; | |
| 332 bed.start2 = atoi(lineVector[4].c_str()); | |
| 333 bed.end2 = atoi(lineVector[5].c_str()); | |
| 334 | |
| 335 bed.name = lineVector[6]; | |
| 336 bed.score = lineVector[7].c_str(); | |
| 337 | |
| 338 bed.strand1 = lineVector[8]; | |
| 339 bed.strand2 = lineVector[9]; | |
| 340 | |
| 341 return true; | |
| 342 } | |
| 343 else if (this->bedType > 10) { | |
| 344 bed.chrom1 = lineVector[0]; | |
| 345 bed.start1 = atoi(lineVector[1].c_str()); | |
| 346 bed.end1 = atoi(lineVector[2].c_str()); | |
| 347 | |
| 348 bed.chrom2 = lineVector[3]; | |
| 349 bed.start2 = atoi(lineVector[4].c_str()); | |
| 350 bed.end2 = atoi(lineVector[5].c_str()); | |
| 351 | |
| 352 bed.name = lineVector[6]; | |
| 353 bed.score = lineVector[7].c_str(); | |
| 354 | |
| 355 bed.strand1 = lineVector[8]; | |
| 356 bed.strand2 = lineVector[9]; | |
| 357 | |
| 358 for (unsigned int i = 10; i < lineVector.size(); ++i) { | |
| 359 bed.otherFields.push_back(lineVector[i]); | |
| 360 } | |
| 361 return true; | |
| 362 } | |
| 363 else { | |
| 364 cerr << "Unexpected number of fields: " << lineNum << ". Verify that your files are TAB-delimited and that your BEDPE file has 6,7,8 or 10 fields. Exiting..." << endl; | |
| 365 exit(1); | |
| 366 } | |
| 367 | |
| 368 if (bed.start1 > bed.end1) { | |
| 369 cerr << "Error: malformed BED entry at line " << lineNum << ". Start1 was greater than End1. Ignoring it and moving on." << endl; | |
| 370 return false; | |
| 371 } | |
| 372 else if (bed.start2 > bed.end2) { | |
| 373 cerr << "Error: malformed BED entry at line " << lineNum << ". Start2 was greater than End2. Ignoring it and moving on." << endl; | |
| 374 return false; | |
| 375 } | |
| 376 else if ( (bed.start1 < 0) || (bed.end1 < 0) || (bed.start2 < 0) || (bed.end2 < 0) ) { | |
| 377 cerr << "Error: malformed BED entry at line " << lineNum << ". Coordinate <= 0. Ignoring it and moving on." << endl; | |
| 378 return false; | |
| 379 } | |
| 380 } | |
| 381 else if (lineVector.size() == 1) { | |
| 382 cerr << "Only one BED field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl; | |
| 383 exit(1); | |
| 384 } | |
| 385 else if ((lineVector.size() != this->bedType) && (lineVector.size() != 0)) { | |
| 386 cerr << "Differing number of BEDPE fields encountered at line: " << lineNum << ". Exiting..." << endl; | |
| 387 exit(1); | |
| 388 } | |
| 389 else if ((lineVector.size() < 6) && (lineVector.size() != 0)) { | |
| 390 cerr << "TAB delimited BEDPE file with at least 6 fields (chrom1, start1, end1, chrom2, start2, end2) is required at line: "<< lineNum << ". Exiting..." << endl; | |
| 391 exit(1); | |
| 392 } | |
| 393 return false; | |
| 394 } | |
| 395 | |
| 396 | |
| 397 /* | |
| 398 Adapted from kent source "binKeeperFind" | |
| 399 */ | |
| 400 void BedFilePE::FindOverlapsPerBin(int bEnd, string chrom, CHRPOS start, CHRPOS end, string name, string strand, | |
| 401 vector<MATE> &hits, float overlapFraction, bool forceStrand, bool enforceDiffNames) { | |
| 402 | |
| 403 int startBin, endBin; | |
| 404 startBin = (start >> _binFirstShift); | |
| 405 endBin = ((end-1) >> _binFirstShift); | |
| 406 | |
| 407 // loop through each bin "level" in the binning hierarchy | |
| 408 for (int i = 0; i < _binLevels; ++i) { | |
| 409 | |
| 410 // loop through each bin at this level of the hierarchy | |
| 411 int offset = _binOffsetsExtended[i]; | |
| 412 for (int j = (startBin+offset); j <= (endBin+offset); ++j) { | |
| 413 | |
| 414 // loop through each feature in this chrom/bin and see if it overlaps | |
| 415 // with the feature that was passed in. if so, add the feature to | |
| 416 // the list of hits. | |
| 417 vector<MATE>::const_iterator bedItr; | |
| 418 vector<MATE>::const_iterator bedEnd; | |
| 419 if (bEnd == 1) { | |
| 420 bedItr = bedMapEnd1[chrom][j].begin(); | |
| 421 bedEnd = bedMapEnd1[chrom][j].end(); | |
| 422 } | |
| 423 else if (bEnd == 2) { | |
| 424 bedItr = bedMapEnd2[chrom][j].begin(); | |
| 425 bedEnd = bedMapEnd2[chrom][j].end(); | |
| 426 } | |
| 427 else { | |
| 428 cerr << "Unexpected end of B requested" << endl; | |
| 429 } | |
| 430 for (; bedItr != bedEnd; ++bedItr) { | |
| 431 float overlap = overlaps(bedItr->bed.start, bedItr->bed.end, start, end); | |
| 432 float size = end - start; | |
| 433 | |
| 434 if ( (overlap / size) >= overlapFraction ) { | |
| 435 | |
| 436 // skip the hit if not on the same strand (and we care) | |
| 437 if ((forceStrand == false) && (enforceDiffNames == false)) { | |
| 438 hits.push_back(*bedItr); // it's a hit, add it. | |
| 439 } | |
| 440 else if ((forceStrand == true) && (enforceDiffNames == false)) { | |
| 441 if (strand == bedItr->bed.strand) | |
| 442 hits.push_back(*bedItr); // it's a hit, add it. | |
| 443 } | |
| 444 else if ((forceStrand == true) && (enforceDiffNames == true)) { | |
| 445 if ((strand == bedItr->bed.strand) && (name != bedItr->bed.name)) | |
| 446 hits.push_back(*bedItr); // it's a hit, add it. | |
| 447 } | |
| 448 else if ((forceStrand == false) && (enforceDiffNames == true)) { | |
| 449 if (name != bedItr->bed.name) | |
| 450 hits.push_back(*bedItr); // it's a hit, add it. | |
| 451 } | |
| 452 } | |
| 453 | |
| 454 } | |
| 455 } | |
| 456 startBin >>= _binNextShift; | |
| 457 endBin >>= _binNextShift; | |
| 458 } | |
| 459 } | |
| 460 | |
| 461 | |
| 462 void BedFilePE::loadBedPEFileIntoMap() { | |
| 463 | |
| 464 int lineNum = 0; | |
| 465 int bin1, bin2; | |
| 466 BedLineStatus bedStatus; | |
| 467 BEDPE bedpeEntry, nullBedPE; | |
| 468 | |
| 469 Open(); | |
| 470 bedStatus = this->GetNextBedPE(bedpeEntry, lineNum); | |
| 471 while (bedStatus != BED_INVALID) { | |
| 472 | |
| 473 if (bedStatus == BED_VALID) { | |
| 474 MATE *bedEntry1 = new MATE(); | |
| 475 MATE *bedEntry2 = new MATE(); | |
| 476 // separate the BEDPE entry into separate | |
| 477 // BED entries | |
| 478 splitBedPEIntoBeds(bedpeEntry, lineNum, bedEntry1, bedEntry2); | |
| 479 | |
| 480 // load end1 into a UCSC bin map | |
| 481 bin1 = getBin(bedEntry1->bed.start, bedEntry1->bed.end); | |
| 482 this->bedMapEnd1[bedEntry1->bed.chrom][bin1].push_back(*bedEntry1); | |
| 483 | |
| 484 // load end2 into a UCSC bin map | |
| 485 bin2 = getBin(bedEntry2->bed.start, bedEntry2->bed.end); | |
| 486 this->bedMapEnd2[bedEntry2->bed.chrom][bin2].push_back(*bedEntry2); | |
| 487 | |
| 488 bedpeEntry = nullBedPE; | |
| 489 } | |
| 490 bedStatus = this->GetNextBedPE(bedpeEntry, lineNum); | |
| 491 } | |
| 492 Close(); | |
| 493 } | |
| 494 | |
| 495 | |
| 496 void BedFilePE::splitBedPEIntoBeds(const BEDPE &bedpeEntry, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2) { | |
| 497 | |
| 498 /* | |
| 499 Split the BEDPE entry into separate BED entries | |
| 500 | |
| 501 NOTE: I am using a trick here where I store | |
| 502 the lineNum of the BEDPE from the original file | |
| 503 in the "count" column. This allows me to later | |
| 504 resolve whether the hits found on both ends of BEDPE A | |
| 505 came from the same entry in BEDPE B. Tracking by "name" | |
| 506 alone with fail when there are multiple mappings for a given | |
| 507 read-pair. | |
| 508 */ | |
| 509 | |
| 510 bedEntry1->bed.chrom = bedpeEntry.chrom1; | |
| 511 bedEntry1->bed.start = bedpeEntry.start1; | |
| 512 bedEntry1->bed.end = bedpeEntry.end1; | |
| 513 bedEntry1->bed.name = bedpeEntry.name; | |
| 514 bedEntry1->bed.score = bedpeEntry.score; // only store the score in end1 to save memory | |
| 515 bedEntry1->bed.strand = bedpeEntry.strand1; | |
| 516 bedEntry1->bed.otherFields = bedpeEntry.otherFields; // only store the otherFields in end1 to save memory | |
| 517 bedEntry1->lineNum = lineNum; | |
| 518 bedEntry1->mate = bedEntry2; // keep a pointer to end2 | |
| 519 | |
| 520 bedEntry2->bed.chrom = bedpeEntry.chrom2; | |
| 521 bedEntry2->bed.start = bedpeEntry.start2; | |
| 522 bedEntry2->bed.end = bedpeEntry.end2; | |
| 523 bedEntry2->bed.name = bedpeEntry.name; | |
| 524 bedEntry2->bed.strand = bedpeEntry.strand2; | |
| 525 bedEntry2->lineNum = lineNum; | |
| 526 bedEntry2->mate = bedEntry1; // keep a pointer to end1 | |
| 527 } | |
| 528 | |
| 529 | |
| 530 |
