Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h @ 0:dfcd8b6c1bda
Uploaded
| author | aaronquinlan |
|---|---|
| date | Thu, 03 Nov 2011 10:25:04 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfcd8b6c1bda |
|---|---|
| 1 /***************************************************************************** | |
| 2 bedFile.h | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licensed under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #ifndef BEDFILE_H | |
| 13 #define BEDFILE_H | |
| 14 | |
| 15 // "local" includes | |
| 16 #include "gzstream.h" | |
| 17 #include "lineFileUtilities.h" | |
| 18 #include "fileType.h" | |
| 19 | |
| 20 // standard includes | |
| 21 #include <vector> | |
| 22 #include <map> | |
| 23 #include <set> | |
| 24 #include <string> | |
| 25 #include <iostream> | |
| 26 #include <fstream> | |
| 27 #include <sstream> | |
| 28 #include <cstring> | |
| 29 #include <algorithm> | |
| 30 #include <limits.h> | |
| 31 #include <stdint.h> | |
| 32 #include <cstdio> | |
| 33 //#include <tr1/unordered_map> // Experimental. | |
| 34 using namespace std; | |
| 35 | |
| 36 | |
| 37 //************************************************* | |
| 38 // Data type tydedef | |
| 39 //************************************************* | |
| 40 typedef uint32_t CHRPOS; | |
| 41 typedef uint16_t BINLEVEL; | |
| 42 typedef uint32_t BIN; | |
| 43 typedef uint16_t USHORT; | |
| 44 typedef uint32_t UINT; | |
| 45 | |
| 46 //************************************************* | |
| 47 // Genome binning constants | |
| 48 //************************************************* | |
| 49 | |
| 50 const BIN _numBins = 37450; | |
| 51 const BINLEVEL _binLevels = 7; | |
| 52 | |
| 53 // bins range in size from 16kb to 512Mb | |
| 54 // Bin 0 spans 512Mbp, # Level 1 | |
| 55 // Bins 1-8 span 64Mbp, # Level 2 | |
| 56 // Bins 9-72 span 8Mbp, # Level 3 | |
| 57 // Bins 73-584 span 1Mbp # Level 4 | |
| 58 // Bins 585-4680 span 128Kbp # Level 5 | |
| 59 // Bins 4681-37449 span 16Kbp # Level 6 | |
| 60 const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0}; | |
| 61 //const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0}; | |
| 62 | |
| 63 const USHORT _binFirstShift = 14; /* How much to shift to get to finest bin. */ | |
| 64 const USHORT _binNextShift = 3; /* How much to shift to get to next larger bin. */ | |
| 65 | |
| 66 | |
| 67 //************************************************* | |
| 68 // Common data structures | |
| 69 //************************************************* | |
| 70 | |
| 71 struct DEPTH { | |
| 72 UINT starts; | |
| 73 UINT ends; | |
| 74 }; | |
| 75 | |
| 76 | |
| 77 /* | |
| 78 Structure for regular BED records | |
| 79 */ | |
| 80 struct BED { | |
| 81 | |
| 82 // Regular BED fields | |
| 83 string chrom; | |
| 84 CHRPOS start; | |
| 85 CHRPOS end; | |
| 86 string name; | |
| 87 string score; | |
| 88 string strand; | |
| 89 | |
| 90 // Add'l fields for BED12 and/or custom BED annotations | |
| 91 vector<string> otherFields; | |
| 92 | |
| 93 // experimental fields for the FJOIN approach. | |
| 94 bool zeroLength; | |
| 95 bool added; | |
| 96 bool finished; | |
| 97 // list of hits from another file. | |
| 98 vector<BED> overlaps; | |
| 99 | |
| 100 public: | |
| 101 // constructors | |
| 102 | |
| 103 // Null | |
| 104 BED() | |
| 105 : chrom(""), | |
| 106 start(0), | |
| 107 end(0), | |
| 108 name(""), | |
| 109 score(""), | |
| 110 strand(""), | |
| 111 otherFields(), | |
| 112 zeroLength(false), | |
| 113 added(false), | |
| 114 finished(false), | |
| 115 overlaps() | |
| 116 {} | |
| 117 | |
| 118 // BED3 | |
| 119 BED(string chrom, CHRPOS start, CHRPOS end) | |
| 120 : chrom(chrom), | |
| 121 start(start), | |
| 122 end(end), | |
| 123 name(""), | |
| 124 score(""), | |
| 125 strand(""), | |
| 126 otherFields(), | |
| 127 zeroLength(false), | |
| 128 added(false), | |
| 129 finished(false), | |
| 130 overlaps() | |
| 131 {} | |
| 132 | |
| 133 // BED4 | |
| 134 BED(string chrom, CHRPOS start, CHRPOS end, string strand) | |
| 135 : chrom(chrom), | |
| 136 start(start), | |
| 137 end(end), | |
| 138 name(""), | |
| 139 score(""), | |
| 140 strand(strand), | |
| 141 otherFields(), | |
| 142 zeroLength(false), | |
| 143 added(false), | |
| 144 finished(false), | |
| 145 overlaps() | |
| 146 {} | |
| 147 | |
| 148 // BED6 | |
| 149 BED(string chrom, CHRPOS start, CHRPOS end, string name, | |
| 150 string score, string strand) | |
| 151 : chrom(chrom), | |
| 152 start(start), | |
| 153 end(end), | |
| 154 name(name), | |
| 155 score(score), | |
| 156 strand(strand), | |
| 157 otherFields(), | |
| 158 zeroLength(false), | |
| 159 added(false), | |
| 160 finished(false), | |
| 161 overlaps() | |
| 162 {} | |
| 163 | |
| 164 // BEDALL | |
| 165 BED(string chrom, CHRPOS start, CHRPOS end, string name, | |
| 166 string score, string strand, vector<string> otherFields) | |
| 167 : chrom(chrom), | |
| 168 start(start), | |
| 169 end(end), | |
| 170 name(name), | |
| 171 score(score), | |
| 172 strand(strand), | |
| 173 otherFields(otherFields), | |
| 174 zeroLength(false), | |
| 175 added(false), | |
| 176 finished(false), | |
| 177 overlaps() | |
| 178 {} | |
| 179 | |
| 180 int size() { | |
| 181 return end-start; | |
| 182 } | |
| 183 | |
| 184 }; // BED | |
| 185 | |
| 186 | |
| 187 /* | |
| 188 Structure for each end of a paired BED record | |
| 189 mate points to the other end. | |
| 190 */ | |
| 191 struct MATE { | |
| 192 BED bed; | |
| 193 int lineNum; | |
| 194 MATE *mate; | |
| 195 }; | |
| 196 | |
| 197 | |
| 198 /* | |
| 199 Structure for regular BED COVERAGE records | |
| 200 */ | |
| 201 struct BEDCOV { | |
| 202 | |
| 203 string chrom; | |
| 204 | |
| 205 // Regular BED fields | |
| 206 CHRPOS start; | |
| 207 CHRPOS end; | |
| 208 string name; | |
| 209 string score; | |
| 210 string strand; | |
| 211 | |
| 212 // Add'l fields for BED12 and/or custom BED annotations | |
| 213 vector<string> otherFields; | |
| 214 | |
| 215 // flag a zero-length feature | |
| 216 bool zeroLength; | |
| 217 | |
| 218 // Additional fields specific to computing coverage | |
| 219 map<unsigned int, DEPTH> depthMap; | |
| 220 unsigned int count; | |
| 221 CHRPOS minOverlapStart; | |
| 222 | |
| 223 | |
| 224 public: | |
| 225 // constructors | |
| 226 // Null | |
| 227 BEDCOV() | |
| 228 : chrom(""), | |
| 229 start(0), | |
| 230 end(0), | |
| 231 name(""), | |
| 232 score(""), | |
| 233 strand(""), | |
| 234 otherFields(), | |
| 235 zeroLength(false), | |
| 236 depthMap(), | |
| 237 count(0), | |
| 238 minOverlapStart(0) | |
| 239 {} | |
| 240 }; | |
| 241 | |
| 242 | |
| 243 /* | |
| 244 Structure for BED COVERAGE records having lists of | |
| 245 multiple coverages | |
| 246 */ | |
| 247 struct BEDCOVLIST { | |
| 248 | |
| 249 // Regular BED fields | |
| 250 string chrom; | |
| 251 CHRPOS start; | |
| 252 CHRPOS end; | |
| 253 string name; | |
| 254 string score; | |
| 255 string strand; | |
| 256 | |
| 257 // Add'l fields for BED12 and/or custom BED annotations | |
| 258 vector<string> otherFields; | |
| 259 | |
| 260 // flag a zero-length feature | |
| 261 bool zeroLength; | |
| 262 | |
| 263 // Additional fields specific to computing coverage | |
| 264 vector< map<unsigned int, DEPTH> > depthMapList; | |
| 265 vector<unsigned int> counts; | |
| 266 vector<CHRPOS> minOverlapStarts; | |
| 267 | |
| 268 | |
| 269 public: | |
| 270 // constructors | |
| 271 // Null | |
| 272 BEDCOVLIST() | |
| 273 : chrom(""), | |
| 274 start(0), | |
| 275 end(0), | |
| 276 name(""), | |
| 277 score(""), | |
| 278 strand(""), | |
| 279 otherFields(), | |
| 280 zeroLength(false), | |
| 281 depthMapList(), | |
| 282 counts(0), | |
| 283 minOverlapStarts(0) | |
| 284 {} | |
| 285 }; | |
| 286 | |
| 287 | |
| 288 // enum to flag the state of a given line in a BED file. | |
| 289 enum BedLineStatus | |
| 290 { | |
| 291 BED_INVALID = -1, | |
| 292 BED_HEADER = 0, | |
| 293 BED_BLANK = 1, | |
| 294 BED_VALID = 2 | |
| 295 }; | |
| 296 | |
| 297 // enum to indicate the type of file we are dealing with | |
| 298 enum FileType | |
| 299 { | |
| 300 BED_FILETYPE, | |
| 301 GFF_FILETYPE, | |
| 302 VCF_FILETYPE | |
| 303 }; | |
| 304 | |
| 305 //************************************************* | |
| 306 // Data structure typedefs | |
| 307 //************************************************* | |
| 308 typedef vector<BED> bedVector; | |
| 309 typedef vector<BEDCOV> bedCovVector; | |
| 310 typedef vector<MATE> mateVector; | |
| 311 typedef vector<BEDCOVLIST> bedCovListVector; | |
| 312 | |
| 313 typedef map<BIN, bedVector, std::less<BIN> > binsToBeds; | |
| 314 typedef map<BIN, bedCovVector, std::less<BIN> > binsToBedCovs; | |
| 315 typedef map<BIN, mateVector, std::less<BIN> > binsToMates; | |
| 316 typedef map<BIN, bedCovListVector, std::less<BIN> > binsToBedCovLists; | |
| 317 | |
| 318 typedef map<string, binsToBeds, std::less<string> > masterBedMap; | |
| 319 typedef map<string, binsToBedCovs, std::less<string> > masterBedCovMap; | |
| 320 typedef map<string, binsToMates, std::less<string> > masterMateMap; | |
| 321 typedef map<string, binsToBedCovLists, std::less<string> > masterBedCovListMap; | |
| 322 typedef map<string, bedVector, std::less<string> > masterBedMapNoBin; | |
| 323 | |
| 324 | |
| 325 // EXPERIMENTAL - wait for TR1 | |
| 326 // typedef vector<BED> bedVector; | |
| 327 // typedef vector<BEDCOV> bedCovVector; | |
| 328 // | |
| 329 // typedef tr1::unordered_map<BIN, bedVector> binsToBeds; | |
| 330 // typedef tr1::unordered_map<BIN, bedCovVector> binsToBedCovs; | |
| 331 // | |
| 332 // typedef tr1::unordered_map<string, binsToBeds> masterBedMap; | |
| 333 // typedef tr1::unordered_map<string, binsToBedCovs> masterBedCovMap; | |
| 334 // typedef tr1::unordered_map<string, bedVector> masterBedMapNoBin; | |
| 335 | |
| 336 | |
| 337 | |
| 338 // return the genome "bin" for a feature with this start and end | |
| 339 inline | |
| 340 BIN getBin(CHRPOS start, CHRPOS end) { | |
| 341 --end; | |
| 342 start >>= _binFirstShift; | |
| 343 end >>= _binFirstShift; | |
| 344 | |
| 345 for (register short i = 0; i < _binLevels; ++i) { | |
| 346 if (start == end) return _binOffsetsExtended[i] + start; | |
| 347 start >>= _binNextShift; | |
| 348 end >>= _binNextShift; | |
| 349 } | |
| 350 cerr << "start " << start << ", end " << end << " out of range in findBin (max is 512M)" << endl; | |
| 351 return 0; | |
| 352 } | |
| 353 | |
| 354 /**************************************************** | |
| 355 // isInteger(s): Tests if string s is a valid integer | |
| 356 *****************************************************/ | |
| 357 inline bool isInteger(const std::string& s) { | |
| 358 int len = s.length(); | |
| 359 for (int i = 0; i < len; i++) { | |
| 360 if (!std::isdigit(s[i])) return false; | |
| 361 } | |
| 362 return true; | |
| 363 } | |
| 364 | |
| 365 | |
| 366 // return the amount of overlap between two features. Negative if none and the the | |
| 367 // number of negative bases is the distance between the two. | |
| 368 inline | |
| 369 int overlaps(CHRPOS aS, CHRPOS aE, CHRPOS bS, CHRPOS bE) { | |
| 370 return min(aE, bE) - max(aS, bS); | |
| 371 } | |
| 372 | |
| 373 // is A after (to the right of) B? | |
| 374 inline | |
| 375 bool after(const BED &a, const BED &b) { | |
| 376 return (a.start >= b.end); | |
| 377 } | |
| 378 | |
| 379 | |
| 380 // Ancillary functions | |
| 381 void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks); | |
| 382 | |
| 383 | |
| 384 // BED Sorting Methods | |
| 385 bool sortByChrom(const BED &a, const BED &b); | |
| 386 bool sortByStart(const BED &a, const BED &b); | |
| 387 bool sortBySizeAsc(const BED &a, const BED &b); | |
| 388 bool sortBySizeDesc(const BED &a, const BED &b); | |
| 389 bool sortByScoreAsc(const BED &a, const BED &b); | |
| 390 bool sortByScoreDesc(const BED &a, const BED &b); | |
| 391 bool byChromThenStart(BED const &a, BED const &b); | |
| 392 | |
| 393 | |
| 394 | |
| 395 //************************************************ | |
| 396 // BedFile Class methods and elements | |
| 397 //************************************************ | |
| 398 class BedFile { | |
| 399 | |
| 400 public: | |
| 401 | |
| 402 // Constructor | |
| 403 BedFile(string &); | |
| 404 | |
| 405 // Destructor | |
| 406 ~BedFile(void); | |
| 407 | |
| 408 // Open a BED file for reading (creates an istream pointer) | |
| 409 void Open(void); | |
| 410 | |
| 411 // Rewind the pointer back to the beginning of the file | |
| 412 void Rewind(void); | |
| 413 | |
| 414 // Jump to a specific byte in the file | |
| 415 void Seek(unsigned long offset); | |
| 416 | |
| 417 bool Empty(); | |
| 418 | |
| 419 // Close an opened BED file. | |
| 420 void Close(void); | |
| 421 | |
| 422 // Get the next BED entry in an opened BED file. | |
| 423 BedLineStatus GetNextBed (BED &bed, int &lineNum, bool forceSorted = false); | |
| 424 | |
| 425 // Returns the next MERGED (i.e., non-overlapping) interval in an opened BED file | |
| 426 // NOTE: assumes input file is sorted by chrom then start | |
| 427 bool GetNextMergedBed(BED &merged_bed, int &lineNum); | |
| 428 | |
| 429 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDs | |
| 430 void loadBedFileIntoMap(); | |
| 431 | |
| 432 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVs | |
| 433 void loadBedCovFileIntoMap(); | |
| 434 | |
| 435 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVLISTs | |
| 436 void loadBedCovListFileIntoMap(); | |
| 437 | |
| 438 // load a BED file into a map keyed by chrom. value is vector of BEDs | |
| 439 void loadBedFileIntoMapNoBin(); | |
| 440 | |
| 441 // Given a chrom, start, end and strand for a single feature, | |
| 442 // search for all overlapping features in another BED file. | |
| 443 // Searches through each relevant genome bin on the same chromosome | |
| 444 // as the single feature. Note: Adapted from kent source "binKeeperFind" | |
| 445 void FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, vector<BED> &hits, bool sameStrand, bool diffStrand); | |
| 446 | |
| 447 // return true if at least one overlap was found. otherwise, return false. | |
| 448 bool FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, | |
| 449 bool sameStrand, bool diffStrand, float overlapFraction = 0.0); | |
| 450 | |
| 451 // return true if at least one __reciprocal__ overlap was found. otherwise, return false. | |
| 452 bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, | |
| 453 bool sameStrand, bool diffStrand, float overlapFraction = 0.0); | |
| 454 | |
| 455 // Given a chrom, start, end and strand for a single feature, | |
| 456 // increment a the number of hits for each feature in B file | |
| 457 // that the feature overlaps | |
| 458 void countHits(const BED &a, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false); | |
| 459 | |
| 460 // same as above, but has special logic that processes a set of | |
| 461 // BED "blocks" from a single entry so as to avoid over-counting | |
| 462 // each "block" of a single BAM/BED12 as distinct coverage. That is, | |
| 463 // if one read has four block, we only want to count the coverage as | |
| 464 // coming from one read, not four. | |
| 465 void countSplitHits(const vector<BED> &bedBlock, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false); | |
| 466 | |
| 467 // Given a chrom, start, end and strand for a single feature, | |
| 468 // increment a the number of hits for each feature in B file | |
| 469 // that the feature overlaps | |
| 470 void countListHits(const BED &a, int index, bool sameStrand, bool diffStrand); | |
| 471 | |
| 472 // the bedfile with which this instance is associated | |
| 473 string bedFile; | |
| 474 unsigned int bedType; // 3-6, 12 for BED | |
| 475 // 9 for GFF | |
| 476 bool isZeroBased; | |
| 477 | |
| 478 // Main data structires used by BEDTools | |
| 479 masterBedCovMap bedCovMap; | |
| 480 masterBedCovListMap bedCovListMap; | |
| 481 masterBedMap bedMap; | |
| 482 masterBedMapNoBin bedMapNoBin; | |
| 483 | |
| 484 private: | |
| 485 | |
| 486 // data | |
| 487 bool _isGff; | |
| 488 bool _isVcf; | |
| 489 bool _typeIsKnown; // do we know the type? (i.e., BED, GFF, VCF) | |
| 490 FileType _fileType; // what is the file type? (BED? GFF? VCF?) | |
| 491 istream *_bedStream; | |
| 492 string _bedLine; | |
| 493 vector<string> _bedFields; | |
| 494 int _merged_start; | |
| 495 int _merged_end; | |
| 496 string _merged_chrom; | |
| 497 int _prev_start; | |
| 498 string _prev_chrom; | |
| 499 | |
| 500 void setZeroBased(bool zeroBased); | |
| 501 void setGff (bool isGff); | |
| 502 void setVcf (bool isVcf); | |
| 503 void setFileType (FileType type); | |
| 504 void setBedType (int colNums); | |
| 505 | |
| 506 /****************************************************** | |
| 507 Private definitions to circumvent linker issues with | |
| 508 templated member functions. | |
| 509 *******************************************************/ | |
| 510 | |
| 511 /* | |
| 512 parseLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.) | |
| 513 */ | |
| 514 template <typename T> | |
| 515 inline BedLineStatus parseLine (T &bed, const vector<string> &lineVector, int &lineNum) { | |
| 516 | |
| 517 //char *p2End, *p3End, *p4End, *p5End; | |
| 518 //long l2, l3, l4, l5; | |
| 519 unsigned int numFields = lineVector.size(); | |
| 520 | |
| 521 // bail out if we have a blank line | |
| 522 if (numFields == 0) { | |
| 523 return BED_BLANK; | |
| 524 } | |
| 525 | |
| 526 if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") == string::npos) && (lineVector[0].find("#") == string::npos) ) { | |
| 527 | |
| 528 if (numFields >= 3) { | |
| 529 // line parsing for all lines after the first non-header line | |
| 530 if (_typeIsKnown == true) { | |
| 531 switch(_fileType) { | |
| 532 case BED_FILETYPE: | |
| 533 if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; | |
| 534 case VCF_FILETYPE: | |
| 535 if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; | |
| 536 case GFF_FILETYPE: | |
| 537 if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; | |
| 538 default: | |
| 539 printf("ERROR: file type encountered. Exiting\n"); | |
| 540 exit(1); | |
| 541 } | |
| 542 } | |
| 543 // line parsing for first non-header line: figure out file contents | |
| 544 else { | |
| 545 // it's BED format if columns 2 and 3 are integers | |
| 546 if (isInteger(lineVector[1]) && isInteger(lineVector[2])) { | |
| 547 setGff(false); | |
| 548 setZeroBased(true); | |
| 549 setFileType(BED_FILETYPE); | |
| 550 setBedType(numFields); // we now expect numFields columns in each line | |
| 551 if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; | |
| 552 } | |
| 553 // it's VCF, assuming the second column is numeric and there are at least 8 fields. | |
| 554 else if (isInteger(lineVector[1]) && numFields >= 8) { | |
| 555 setGff(false); | |
| 556 setVcf(true); | |
| 557 setZeroBased(false); | |
| 558 setFileType(VCF_FILETYPE); | |
| 559 setBedType(numFields); // we now expect numFields columns in each line | |
| 560 if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; | |
| 561 } | |
| 562 // it's GFF, assuming columns columns 4 and 5 are numeric and we have 9 fields total. | |
| 563 else if ((numFields >= 8) && isInteger(lineVector[3]) && isInteger(lineVector[4])) { | |
| 564 setGff(true); | |
| 565 setZeroBased(false); | |
| 566 setFileType(GFF_FILETYPE); | |
| 567 setBedType(numFields); // we now expect numFields columns in each line | |
| 568 if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; | |
| 569 } | |
| 570 else { | |
| 571 cerr << "Unexpected file format. Please use tab-delimited BED, GFF, or VCF. " << | |
| 572 "Perhaps you have non-integer starts or ends at line " << lineNum << "?" << endl; | |
| 573 exit(1); | |
| 574 } | |
| 575 } | |
| 576 } | |
| 577 else { | |
| 578 cerr << "It looks as though you have less than 3 columns at line: " << lineNum << ". Are you sure your files are tab-delimited?" << endl; | |
| 579 exit(1); | |
| 580 } | |
| 581 } | |
| 582 else { | |
| 583 lineNum--; | |
| 584 return BED_HEADER; | |
| 585 } | |
| 586 // default | |
| 587 return BED_INVALID; | |
| 588 } | |
| 589 | |
| 590 | |
| 591 /* | |
| 592 parseBedLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.) | |
| 593 */ | |
| 594 template <typename T> | |
| 595 inline bool parseBedLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) { | |
| 596 | |
| 597 // process as long as the number of fields in this | |
| 598 // line matches what we expect for this file. | |
| 599 if (numFields == this->bedType) { | |
| 600 bed.chrom = lineVector[0]; | |
| 601 int i; | |
| 602 i = atoi(lineVector[1].c_str()); | |
| 603 if (i<0) { | |
| 604 cerr << "Error: malformed BED entry at line " << lineNum << ". Start Coordinate detected that is < 0. Exiting." << endl; | |
| 605 exit(1); | |
| 606 } | |
| 607 bed.start = (CHRPOS)i; | |
| 608 i = atoi(lineVector[2].c_str()); | |
| 609 if (i<0) { | |
| 610 cerr << "Error: malformed BED entry at line " << lineNum << ". End Coordinate detected that is < 0. Exiting." << endl; | |
| 611 exit(1); | |
| 612 } | |
| 613 bed.end = (CHRPOS)i; | |
| 614 | |
| 615 // handle starts == end (e.g., insertions in reference genome) | |
| 616 if (bed.start == bed.end) { | |
| 617 bed.start--; | |
| 618 bed.end++; | |
| 619 bed.zeroLength = true; | |
| 620 } | |
| 621 | |
| 622 if (this->bedType == 4) { | |
| 623 bed.name = lineVector[3]; | |
| 624 } | |
| 625 else if (this->bedType == 5) { | |
| 626 bed.name = lineVector[3]; | |
| 627 bed.score = lineVector[4]; | |
| 628 } | |
| 629 else if (this->bedType == 6) { | |
| 630 bed.name = lineVector[3]; | |
| 631 bed.score = lineVector[4]; | |
| 632 bed.strand = lineVector[5]; | |
| 633 } | |
| 634 else if (this->bedType > 6) { | |
| 635 bed.name = lineVector[3]; | |
| 636 bed.score = lineVector[4]; | |
| 637 bed.strand = lineVector[5]; | |
| 638 for (unsigned int i = 6; i < lineVector.size(); ++i) { | |
| 639 bed.otherFields.push_back(lineVector[i]); | |
| 640 } | |
| 641 } | |
| 642 else if (this->bedType != 3) { | |
| 643 cerr << "Error: unexpected number of fields at line: " << lineNum | |
| 644 << ". Verify that your files are TAB-delimited. Exiting..." << endl; | |
| 645 exit(1); | |
| 646 } | |
| 647 | |
| 648 // sanity checks. | |
| 649 if (bed.start <= bed.end) { | |
| 650 return true; | |
| 651 } | |
| 652 else { | |
| 653 cerr << "Error: malformed BED entry at line " << lineNum << ". Start was greater than end. Exiting." << endl; | |
| 654 exit(1); | |
| 655 } | |
| 656 } | |
| 657 else if (numFields == 1) { | |
| 658 cerr << "Only one BED field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl; | |
| 659 exit(1); | |
| 660 } | |
| 661 else if ((numFields != this->bedType) && (numFields != 0)) { | |
| 662 cerr << "Differing number of BED fields encountered at line: " << lineNum << ". Exiting..." << endl; | |
| 663 exit(1); | |
| 664 } | |
| 665 else if ((numFields < 3) && (numFields != 0)) { | |
| 666 cerr << "TAB delimited BED file with at least 3 fields (chrom, start, end) is required at line: "<< lineNum << ". Exiting..." << endl; | |
| 667 exit(1); | |
| 668 } | |
| 669 return false; | |
| 670 } | |
| 671 | |
| 672 | |
| 673 /* | |
| 674 parseVcfLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.) | |
| 675 */ | |
| 676 template <typename T> | |
| 677 inline bool parseVcfLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) { | |
| 678 if (numFields == this->bedType) { | |
| 679 bed.chrom = lineVector[0]; | |
| 680 bed.start = atoi(lineVector[1].c_str()) - 1; // VCF is one-based | |
| 681 bed.end = bed.start + lineVector[3].size(); // VCF 4.0 stores the size of the affected REF allele. | |
| 682 bed.strand = "+"; | |
| 683 // construct the name from the ref and alt alleles. | |
| 684 // if it's an annotated variant, add the rsId as well. | |
| 685 bed.name = lineVector[3] + "/" + lineVector[4]; | |
| 686 if (lineVector[2] != ".") { | |
| 687 bed.name += "_" + lineVector[2]; | |
| 688 } | |
| 689 | |
| 690 if (this->bedType > 2) { | |
| 691 for (unsigned int i = 2; i < numFields; ++i) | |
| 692 bed.otherFields.push_back(lineVector[i]); | |
| 693 } | |
| 694 | |
| 695 if ((bed.start <= bed.end) && (bed.start >= 0) && (bed.end >= 0)) { | |
| 696 return true; | |
| 697 } | |
| 698 else if (bed.start > bed.end) { | |
| 699 cerr << "Error: malformed VCF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl; | |
| 700 exit(1); | |
| 701 } | |
| 702 else if ( (bed.start < 0) || (bed.end < 0) ) { | |
| 703 cerr << "Error: malformed VCF entry at line " << lineNum << ". Coordinate detected that is < 0. Exiting." << endl; | |
| 704 exit(1); | |
| 705 } | |
| 706 } | |
| 707 else if (numFields == 1) { | |
| 708 cerr << "Only one VCF field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl; | |
| 709 exit(1); | |
| 710 } | |
| 711 else if ((numFields != this->bedType) && (numFields != 0)) { | |
| 712 cerr << "Differing number of VCF fields encountered at line: " << lineNum << ". Exiting..." << endl; | |
| 713 exit(1); | |
| 714 } | |
| 715 else if ((numFields < 2) && (numFields != 0)) { | |
| 716 cerr << "TAB delimited VCF file with at least 2 fields (chrom, pos) is required at line: "<< lineNum << ". Exiting..." << endl; | |
| 717 exit(1); | |
| 718 } | |
| 719 return false; | |
| 720 } | |
| 721 | |
| 722 | |
| 723 | |
| 724 /* | |
| 725 parseGffLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.) | |
| 726 */ | |
| 727 template <typename T> | |
| 728 inline bool parseGffLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) { | |
| 729 if (numFields == this->bedType) { | |
| 730 if (this->bedType >= 8 && _isGff) { | |
| 731 bed.chrom = lineVector[0]; | |
| 732 if (isInteger(lineVector[3])) | |
| 733 bed.start = atoi(lineVector[3].c_str()); | |
| 734 if (isInteger(lineVector[4])) | |
| 735 bed.end = atoi(lineVector[4].c_str()); | |
| 736 bed.name = lineVector[2]; | |
| 737 bed.score = lineVector[5]; | |
| 738 bed.strand = lineVector[6].c_str(); | |
| 739 bed.otherFields.push_back(lineVector[1]); // add GFF "source". unused in BED | |
| 740 bed.otherFields.push_back(lineVector[7]); // add GFF "fname". unused in BED | |
| 741 // handle the optional 9th field. | |
| 742 if (this->bedType == 9) | |
| 743 bed.otherFields.push_back(lineVector[8]); // add GFF "group". unused in BED | |
| 744 bed.start--; | |
| 745 } | |
| 746 else { | |
| 747 cerr << "Error: unexpected number of fields at line: " << lineNum << | |
| 748 ". Verify that your files are TAB-delimited and that your GFF file has 8 or 9 fields. Exiting..." << endl; | |
| 749 exit(1); | |
| 750 } | |
| 751 if (bed.start > bed.end) { | |
| 752 cerr << "Error: malformed GFF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl; | |
| 753 exit(1); | |
| 754 } | |
| 755 if ( (bed.start < 0) || (bed.end < 0) ) { | |
| 756 cerr << "Error: malformed GFF entry at line " << lineNum << ". Coordinate detected that is < 1. Exiting." << endl; | |
| 757 exit(1); | |
| 758 } | |
| 759 return true; | |
| 760 } | |
| 761 else if (numFields == 1) { | |
| 762 cerr << "Only one GFF field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl; | |
| 763 exit(1); | |
| 764 } | |
| 765 else if ((numFields != this->bedType) && (numFields != 0)) { | |
| 766 cerr << "Differing number of GFF fields encountered at line: " << lineNum << ". Exiting..." << endl; | |
| 767 exit(1); | |
| 768 } | |
| 769 else if ((numFields < 8) && (numFields != 0)) { | |
| 770 cerr << "TAB delimited GFF file with 8 or 9 fields is required at line: "<< lineNum << ". Exiting..." << endl; | |
| 771 exit(1); | |
| 772 } | |
| 773 return false; | |
| 774 } | |
| 775 | |
| 776 | |
| 777 public: | |
| 778 | |
| 779 /* | |
| 780 reportBedTab | |
| 781 | |
| 782 Writes the _original_ BED entry with a TAB | |
| 783 at the end of the line. | |
| 784 Works for BED3 - BED6. | |
| 785 */ | |
| 786 template <typename T> | |
| 787 inline void reportBedTab(const T &bed) { | |
| 788 | |
| 789 // if it is azeroLength feature, we need to | |
| 790 // correct the start and end coords to what they were | |
| 791 // in the original file | |
| 792 CHRPOS start = bed.start; | |
| 793 CHRPOS end = bed.end; | |
| 794 if (bed.zeroLength) { | |
| 795 if (_isGff == false) | |
| 796 start++; | |
| 797 end--; | |
| 798 } | |
| 799 | |
| 800 // BED | |
| 801 if (_isGff == false && _isVcf == false) { | |
| 802 if (this->bedType == 3) { | |
| 803 printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end); | |
| 804 } | |
| 805 else if (this->bedType == 4) { | |
| 806 printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str()); | |
| 807 } | |
| 808 else if (this->bedType == 5) { | |
| 809 printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 810 bed.score.c_str()); | |
| 811 } | |
| 812 else if (this->bedType == 6) { | |
| 813 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 814 bed.score.c_str(), bed.strand.c_str()); | |
| 815 } | |
| 816 else if (this->bedType > 6) { | |
| 817 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 818 bed.score.c_str(), bed.strand.c_str()); | |
| 819 | |
| 820 vector<string>::const_iterator othIt = bed.otherFields.begin(); | |
| 821 vector<string>::const_iterator othEnd = bed.otherFields.end(); | |
| 822 for ( ; othIt != othEnd; ++othIt) { | |
| 823 printf("%s\t", othIt->c_str()); | |
| 824 } | |
| 825 } | |
| 826 } | |
| 827 // VCF | |
| 828 else if (_isGff == false && _isVcf == true) { | |
| 829 printf ("%s\t%d\t", bed.chrom.c_str(), start+1); | |
| 830 | |
| 831 vector<string>::const_iterator othIt = bed.otherFields.begin(); | |
| 832 vector<string>::const_iterator othEnd = bed.otherFields.end(); | |
| 833 for ( ; othIt != othEnd; ++othIt) { | |
| 834 printf("%s\t", othIt->c_str()); | |
| 835 } | |
| 836 } | |
| 837 // GFF | |
| 838 else if (_isGff == true) { | |
| 839 // "GFF-8" | |
| 840 if (this->bedType == 8) { | |
| 841 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(), | |
| 842 bed.name.c_str(), start+1, end, | |
| 843 bed.score.c_str(), bed.strand.c_str(), | |
| 844 bed.otherFields[1].c_str()); | |
| 845 } | |
| 846 // "GFF-9" | |
| 847 else if (this->bedType == 9) { | |
| 848 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(), | |
| 849 bed.name.c_str(), start+1, end, | |
| 850 bed.score.c_str(), bed.strand.c_str(), | |
| 851 bed.otherFields[1].c_str(), bed.otherFields[2].c_str()); | |
| 852 } | |
| 853 } | |
| 854 } | |
| 855 | |
| 856 | |
| 857 | |
| 858 /* | |
| 859 reportBedNewLine | |
| 860 | |
| 861 Writes the _original_ BED entry with a NEWLINE | |
| 862 at the end of the line. | |
| 863 Works for BED3 - BED6. | |
| 864 */ | |
| 865 template <typename T> | |
| 866 inline void reportBedNewLine(const T &bed) { | |
| 867 | |
| 868 // if it is azeroLength feature, we need to | |
| 869 // correct the start and end coords to what they were | |
| 870 // in the original file | |
| 871 CHRPOS start = bed.start; | |
| 872 CHRPOS end = bed.end; | |
| 873 if (bed.zeroLength) { | |
| 874 if (_isGff == false) | |
| 875 start++; | |
| 876 end--; | |
| 877 } | |
| 878 | |
| 879 //BED | |
| 880 if (_isGff == false && _isVcf == false) { | |
| 881 if (this->bedType == 3) { | |
| 882 printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end); | |
| 883 } | |
| 884 else if (this->bedType == 4) { | |
| 885 printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str()); | |
| 886 } | |
| 887 else if (this->bedType == 5) { | |
| 888 printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 889 bed.score.c_str()); | |
| 890 } | |
| 891 else if (this->bedType == 6) { | |
| 892 printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 893 bed.score.c_str(), bed.strand.c_str()); | |
| 894 } | |
| 895 else if (this->bedType > 6) { | |
| 896 printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 897 bed.score.c_str(), bed.strand.c_str()); | |
| 898 | |
| 899 vector<string>::const_iterator othIt = bed.otherFields.begin(); | |
| 900 vector<string>::const_iterator othEnd = bed.otherFields.end(); | |
| 901 for ( ; othIt != othEnd; ++othIt) { | |
| 902 printf("\t%s", othIt->c_str()); | |
| 903 } | |
| 904 printf("\n"); | |
| 905 } | |
| 906 } | |
| 907 // VCF | |
| 908 else if (_isGff == false && _isVcf == true) { | |
| 909 printf ("%s\t%d\t", bed.chrom.c_str(), start+1); | |
| 910 | |
| 911 vector<string>::const_iterator othIt = bed.otherFields.begin(); | |
| 912 vector<string>::const_iterator othEnd = bed.otherFields.end(); | |
| 913 for ( ; othIt != othEnd; ++othIt) { | |
| 914 printf("%s\t", othIt->c_str()); | |
| 915 } | |
| 916 printf("\n"); | |
| 917 } | |
| 918 // GFF | |
| 919 else if (_isGff == true) { | |
| 920 // "GFF-8" | |
| 921 if (this->bedType == 8) { | |
| 922 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(), | |
| 923 bed.name.c_str(), start+1, end, | |
| 924 bed.score.c_str(), bed.strand.c_str(), | |
| 925 bed.otherFields[1].c_str()); | |
| 926 } | |
| 927 // "GFF-9" | |
| 928 else if (this->bedType == 9) { | |
| 929 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(), | |
| 930 bed.name.c_str(), start+1, end, | |
| 931 bed.score.c_str(), bed.strand.c_str(), | |
| 932 bed.otherFields[1].c_str(), bed.otherFields[2].c_str()); | |
| 933 } | |
| 934 } | |
| 935 } | |
| 936 | |
| 937 | |
| 938 | |
| 939 /* | |
| 940 reportBedRangeNewLine | |
| 941 | |
| 942 Writes a custom start->end for a BED entry | |
| 943 with a NEWLINE at the end of the line. | |
| 944 | |
| 945 Works for BED3 - BED6. | |
| 946 */ | |
| 947 template <typename T> | |
| 948 inline void reportBedRangeTab(const T &bed, CHRPOS start, CHRPOS end) { | |
| 949 | |
| 950 // if it is azeroLength feature, we need to | |
| 951 // correct the start and end coords to what they were | |
| 952 // in the original file | |
| 953 if (bed.zeroLength) { | |
| 954 start = bed.start + 1; | |
| 955 end = bed.end - 1; | |
| 956 } | |
| 957 // BED | |
| 958 if (_isGff == false && _isVcf == false) { | |
| 959 if (this->bedType == 3) { | |
| 960 printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end); | |
| 961 } | |
| 962 else if (this->bedType == 4) { | |
| 963 printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str()); | |
| 964 } | |
| 965 else if (this->bedType == 5) { | |
| 966 printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 967 bed.score.c_str()); | |
| 968 } | |
| 969 else if (this->bedType == 6) { | |
| 970 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 971 bed.score.c_str(), bed.strand.c_str()); | |
| 972 } | |
| 973 else if (this->bedType > 6) { | |
| 974 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 975 bed.score.c_str(), bed.strand.c_str()); | |
| 976 | |
| 977 vector<string>::const_iterator othIt = bed.otherFields.begin(); | |
| 978 vector<string>::const_iterator othEnd = bed.otherFields.end(); | |
| 979 for ( ; othIt != othEnd; ++othIt) { | |
| 980 printf("%s\t", othIt->c_str()); | |
| 981 } | |
| 982 } | |
| 983 } | |
| 984 // VCF | |
| 985 else if (_isGff == false && _isVcf == true) { | |
| 986 printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1); | |
| 987 | |
| 988 vector<string>::const_iterator othIt = bed.otherFields.begin(); | |
| 989 vector<string>::const_iterator othEnd = bed.otherFields.end(); | |
| 990 for ( ; othIt != othEnd; ++othIt) { | |
| 991 printf("%s\t", othIt->c_str()); | |
| 992 } | |
| 993 } | |
| 994 // GFF | |
| 995 else if (_isGff == true) { | |
| 996 // "GFF-8" | |
| 997 if (this->bedType == 8) { | |
| 998 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(), | |
| 999 bed.name.c_str(), start+1, end, | |
| 1000 bed.score.c_str(), bed.strand.c_str(), | |
| 1001 bed.otherFields[1].c_str()); | |
| 1002 } | |
| 1003 // "GFF-9" | |
| 1004 else if (this->bedType == 9) { | |
| 1005 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(), | |
| 1006 bed.name.c_str(), start+1, end, | |
| 1007 bed.score.c_str(), bed.strand.c_str(), | |
| 1008 bed.otherFields[1].c_str(), bed.otherFields[2].c_str()); | |
| 1009 } | |
| 1010 } | |
| 1011 } | |
| 1012 | |
| 1013 | |
| 1014 | |
| 1015 /* | |
| 1016 reportBedRangeTab | |
| 1017 | |
| 1018 Writes a custom start->end for a BED entry | |
| 1019 with a TAB at the end of the line. | |
| 1020 | |
| 1021 Works for BED3 - BED6. | |
| 1022 */ | |
| 1023 template <typename T> | |
| 1024 inline void reportBedRangeNewLine(const T &bed, CHRPOS start, CHRPOS end) { | |
| 1025 | |
| 1026 // if it is azeroLength feature, we need to | |
| 1027 // correct the start and end coords to what they were | |
| 1028 // in the original file | |
| 1029 if (bed.zeroLength) { | |
| 1030 start = bed.start + 1; | |
| 1031 end = bed.end - 1; | |
| 1032 } | |
| 1033 // BED | |
| 1034 if (_isGff == false && _isVcf == false) { | |
| 1035 if (this->bedType == 3) { | |
| 1036 printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end); | |
| 1037 } | |
| 1038 else if (this->bedType == 4) { | |
| 1039 printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str()); | |
| 1040 } | |
| 1041 else if (this->bedType == 5) { | |
| 1042 printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 1043 bed.score.c_str()); | |
| 1044 } | |
| 1045 else if (this->bedType == 6) { | |
| 1046 printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 1047 bed.score.c_str(), bed.strand.c_str()); | |
| 1048 } | |
| 1049 else if (this->bedType > 6) { | |
| 1050 printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(), | |
| 1051 bed.score.c_str(), bed.strand.c_str()); | |
| 1052 | |
| 1053 vector<string>::const_iterator othIt = bed.otherFields.begin(); | |
| 1054 vector<string>::const_iterator othEnd = bed.otherFields.end(); | |
| 1055 for ( ; othIt != othEnd; ++othIt) { | |
| 1056 printf("\t%s", othIt->c_str()); | |
| 1057 } | |
| 1058 printf("\n"); | |
| 1059 } | |
| 1060 } | |
| 1061 // VCF | |
| 1062 else if (_isGff == false && _isVcf == true) { | |
| 1063 printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1); | |
| 1064 | |
| 1065 vector<string>::const_iterator othIt = bed.otherFields.begin(); | |
| 1066 vector<string>::const_iterator othEnd = bed.otherFields.end(); | |
| 1067 for ( ; othIt != othEnd; ++othIt) { | |
| 1068 printf("%s\t", othIt->c_str()); | |
| 1069 } | |
| 1070 printf("\n"); | |
| 1071 } | |
| 1072 // GFF | |
| 1073 else if (_isGff == true) { | |
| 1074 // "GFF-9" | |
| 1075 if (this->bedType == 8) { | |
| 1076 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(), | |
| 1077 bed.name.c_str(), start+1, end, | |
| 1078 bed.score.c_str(), bed.strand.c_str(), | |
| 1079 bed.otherFields[1].c_str()); | |
| 1080 } | |
| 1081 // "GFF-8" | |
| 1082 else if (this->bedType == 9) { | |
| 1083 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(), | |
| 1084 bed.name.c_str(), start+1, end, | |
| 1085 bed.score.c_str(), bed.strand.c_str(), | |
| 1086 bed.otherFields[1].c_str(), bed.otherFields[2].c_str()); | |
| 1087 } | |
| 1088 } | |
| 1089 } | |
| 1090 | |
| 1091 | |
| 1092 /* | |
| 1093 reportNullBedTab | |
| 1094 */ | |
| 1095 void reportNullBedTab() { | |
| 1096 | |
| 1097 if (_isGff == false && _isVcf == false) { | |
| 1098 if (this->bedType == 3) { | |
| 1099 printf (".\t-1\t-1\t"); | |
| 1100 } | |
| 1101 else if (this->bedType == 4) { | |
| 1102 printf (".\t-1\t-1\t.\t"); | |
| 1103 } | |
| 1104 else if (this->bedType == 5) { | |
| 1105 printf (".\t-1\t-1\t.\t-1\t"); | |
| 1106 } | |
| 1107 else if (this->bedType == 6) { | |
| 1108 printf (".\t-1\t-1\t.\t-1\t.\t"); | |
| 1109 } | |
| 1110 else if (this->bedType > 6) { | |
| 1111 printf (".\t-1\t-1\t.\t-1\t.\t"); | |
| 1112 for (unsigned int i = 6; i < this->bedType; ++i) { | |
| 1113 printf(".\t"); | |
| 1114 } | |
| 1115 } | |
| 1116 } | |
| 1117 else if (_isGff == true && _isVcf == false) { | |
| 1118 if (this->bedType == 8) { | |
| 1119 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t"); | |
| 1120 } | |
| 1121 else if (this->bedType == 9) { | |
| 1122 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\t"); | |
| 1123 } | |
| 1124 } | |
| 1125 } | |
| 1126 | |
| 1127 | |
| 1128 /* | |
| 1129 reportNullBedTab | |
| 1130 */ | |
| 1131 void reportNullBedNewLine() { | |
| 1132 | |
| 1133 if (_isGff == false && _isVcf == false) { | |
| 1134 if (this->bedType == 3) { | |
| 1135 printf (".\t-1\t-1\n"); | |
| 1136 } | |
| 1137 else if (this->bedType == 4) { | |
| 1138 printf (".\t-1\t-1\t.\n"); | |
| 1139 } | |
| 1140 else if (this->bedType == 5) { | |
| 1141 printf (".\t-1\t-1\t.\t-1\n"); | |
| 1142 } | |
| 1143 else if (this->bedType == 6) { | |
| 1144 printf (".\t-1\t-1\t.\t-1\t.\n"); | |
| 1145 } | |
| 1146 else if (this->bedType > 6) { | |
| 1147 printf (".\t-1\t-1\t.\t-1\t."); | |
| 1148 for (unsigned int i = 6; i < this->bedType; ++i) { | |
| 1149 printf("\t."); | |
| 1150 } | |
| 1151 printf("\n"); | |
| 1152 } | |
| 1153 } | |
| 1154 else if (_isGff == true && _isVcf == false) { | |
| 1155 if (this->bedType == 8) { | |
| 1156 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\n"); | |
| 1157 } | |
| 1158 else if (this->bedType == 9) { | |
| 1159 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\n"); | |
| 1160 } | |
| 1161 } | |
| 1162 } | |
| 1163 | |
| 1164 | |
| 1165 }; | |
| 1166 | |
| 1167 #endif /* BEDFILE_H */ |
