Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/bamToBed/bamToBed.cpp @ 0:dfcd8b6c1bda
Uploaded
| author | aaronquinlan |
|---|---|
| date | Thu, 03 Nov 2011 10:25:04 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfcd8b6c1bda |
|---|---|
| 1 /***************************************************************************** | |
| 2 bamToBed.cpp | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #include "version.h" | |
| 13 #include "api/BamReader.h" | |
| 14 #include "api/BamAux.h" | |
| 15 #include "BamAncillary.h" | |
| 16 #include "bedFile.h" | |
| 17 using namespace BamTools; | |
| 18 | |
| 19 #include <vector> | |
| 20 #include <algorithm> // for swap() | |
| 21 #include <iostream> | |
| 22 #include <fstream> | |
| 23 #include <stdlib.h> | |
| 24 | |
| 25 using namespace std; | |
| 26 | |
| 27 | |
| 28 // define our program name | |
| 29 #define PROGRAM_NAME "bamToBed" | |
| 30 | |
| 31 // define our parameter checking macro | |
| 32 #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) | |
| 33 | |
| 34 | |
| 35 // function declarations | |
| 36 void ShowHelp(void); | |
| 37 | |
| 38 void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag, | |
| 39 const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar); | |
| 40 void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance); | |
| 41 | |
| 42 void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar); | |
| 43 void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color = "255,0,0"); | |
| 44 void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, | |
| 45 const RefVector &refs, bool useEditDistance); | |
| 46 | |
| 47 void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts, | |
| 48 vector<int> &blockEnds, int &alignmentEnd); | |
| 49 string BuildCigarString(const vector<CigarOp> &cigar); | |
| 50 | |
| 51 bool IsCorrectMappingForBEDPE (const BamAlignment &bam); | |
| 52 | |
| 53 | |
| 54 int main(int argc, char* argv[]) { | |
| 55 | |
| 56 // our configuration variables | |
| 57 bool showHelp = false; | |
| 58 | |
| 59 // input files | |
| 60 string bamFile = "stdin"; | |
| 61 string color = "255,0,0"; | |
| 62 string tag = ""; | |
| 63 | |
| 64 bool haveBam = true; | |
| 65 bool haveColor = false; | |
| 66 bool haveOtherTag = false; | |
| 67 bool writeBedPE = false; | |
| 68 bool writeBed12 = false; | |
| 69 bool useEditDistance = false; | |
| 70 bool useAlignmentScore = false; | |
| 71 bool useCigar = false; | |
| 72 bool obeySplits = false; | |
| 73 | |
| 74 // check to see if we should print out some help | |
| 75 | |
| 76 for(int i = 1; i < argc; i++) { | |
| 77 int parameterLength = (int)strlen(argv[i]); | |
| 78 | |
| 79 if((PARAMETER_CHECK("-h", 2, parameterLength)) || | |
| 80 (PARAMETER_CHECK("--help", 5, parameterLength))) { | |
| 81 showHelp = true; | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 if(showHelp) ShowHelp(); | |
| 86 | |
| 87 // do some parsing (all of these parameters require 2 strings) | |
| 88 for(int i = 1; i < argc; i++) { | |
| 89 | |
| 90 int parameterLength = (int)strlen(argv[i]); | |
| 91 | |
| 92 if(PARAMETER_CHECK("-i", 2, parameterLength)) { | |
| 93 if ((i+1) < argc) { | |
| 94 bamFile = argv[i + 1]; | |
| 95 i++; | |
| 96 } | |
| 97 } | |
| 98 else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) { | |
| 99 writeBedPE = true; | |
| 100 } | |
| 101 else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) { | |
| 102 writeBed12 = true; | |
| 103 } | |
| 104 else if(PARAMETER_CHECK("-split", 6, parameterLength)) { | |
| 105 obeySplits = true; | |
| 106 } | |
| 107 else if(PARAMETER_CHECK("-ed", 3, parameterLength)) { | |
| 108 useEditDistance = true; | |
| 109 } | |
| 110 else if(PARAMETER_CHECK("-cigar", 6, parameterLength)) { | |
| 111 useCigar = true; | |
| 112 } | |
| 113 else if(PARAMETER_CHECK("-as", 3, parameterLength)) { | |
| 114 useAlignmentScore = true; | |
| 115 } | |
| 116 else if(PARAMETER_CHECK("-color", 6, parameterLength)) { | |
| 117 if ((i+1) < argc) { | |
| 118 haveColor = true; | |
| 119 color = argv[i + 1]; | |
| 120 i++; | |
| 121 } | |
| 122 } | |
| 123 else if(PARAMETER_CHECK("-tag", 4, parameterLength)) { | |
| 124 if ((i+1) < argc) { | |
| 125 haveOtherTag = true; | |
| 126 tag = argv[i + 1]; | |
| 127 i++; | |
| 128 } | |
| 129 } | |
| 130 else { | |
| 131 cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; | |
| 132 showHelp = true; | |
| 133 } | |
| 134 } | |
| 135 | |
| 136 // make sure we have an input files | |
| 137 if (haveBam == false) { | |
| 138 cerr << endl << "*****" << endl << "*****ERROR: Need -i (BAM) file. " << endl << "*****" << endl; | |
| 139 showHelp = true; | |
| 140 } | |
| 141 if (haveColor == true && writeBed12 == false) { | |
| 142 cerr << endl << "*****" << endl << "*****ERROR: Cannot use color without BED12. " << endl << "*****" << endl; | |
| 143 showHelp = true; | |
| 144 } | |
| 145 if (useEditDistance == true && obeySplits == true) { | |
| 146 cerr << endl << "*****" << endl << "*****ERROR: Cannot use -ed with -splits. Edit distances cannot be computed for each \'chunk\'." << endl << "*****" << endl; | |
| 147 showHelp = true; | |
| 148 } | |
| 149 if (useEditDistance == true && useCigar == true) { | |
| 150 cerr << endl << "*****" << endl << "*****ERROR: Cannot use -cigar with -splits. Not yet supported." << endl << "*****" << endl; | |
| 151 showHelp = true; | |
| 152 } | |
| 153 if (useEditDistance == true && haveOtherTag == true) { | |
| 154 cerr << endl << "*****" << endl << "*****ERROR: Cannot use -ed with -tag. Choose one or the other." << endl << "*****" << endl; | |
| 155 showHelp = true; | |
| 156 } | |
| 157 if (writeBedPE == true && haveOtherTag == true) { | |
| 158 cerr << endl << "*****" << endl << "*****ERROR: Cannot use -tag with -bedpe." << endl << "*****" << endl; | |
| 159 showHelp = true; | |
| 160 } | |
| 161 // if there are no problems, let's convert BAM to BED or BEDPE | |
| 162 if (!showHelp) { | |
| 163 if (writeBedPE == false) | |
| 164 ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, obeySplits, color, useCigar); // BED or "blocked BED" | |
| 165 else | |
| 166 ConvertBamToBedpe(bamFile, useEditDistance); // BEDPE | |
| 167 } | |
| 168 else { | |
| 169 ShowHelp(); | |
| 170 } | |
| 171 } | |
| 172 | |
| 173 | |
| 174 void ShowHelp(void) { | |
| 175 | |
| 176 cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; | |
| 177 | |
| 178 cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; | |
| 179 | |
| 180 cerr << "Summary: Converts BAM alignments to BED6 or BEDPE format." << endl << endl; | |
| 181 | |
| 182 cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bam> " << endl << endl; | |
| 183 | |
| 184 cerr << "Options: " << endl; | |
| 185 | |
| 186 cerr << "\t-bedpe\t" << "Write BEDPE format." << endl; | |
| 187 cerr << "\t\t- Requires BAM to be grouped or sorted by query." << endl << endl; | |
| 188 | |
| 189 cerr << "\t-bed12\t" << "Write \"blocked\" BED format (aka \"BED12\")." << endl << endl; | |
| 190 cerr << "\t\thttp://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1" << endl << endl; | |
| 191 | |
| 192 cerr << "\t-split\t" << "Report \"split\" BAM alignments as separate BED entries." << endl << endl; | |
| 193 | |
| 194 cerr << "\t-ed\t" << "Use BAM edit distance (NM tag) for BED score." << endl; | |
| 195 cerr << "\t\t- Default for BED is to use mapping quality." << endl; | |
| 196 cerr << "\t\t- Default for BEDPE is to use the minimum of" << endl; | |
| 197 cerr << "\t\t the two mapping qualities for the pair." << endl; | |
| 198 cerr << "\t\t- When -ed is used with -bedpe, the total edit" << endl; | |
| 199 cerr << "\t\t distance from the two mates is reported." << endl << endl; | |
| 200 | |
| 201 cerr << "\t-tag\t" << "Use other NUMERIC BAM alignment tag for BED score." << endl; | |
| 202 cerr << "\t\t- Default for BED is to use mapping quality." << endl; | |
| 203 cerr << "\t\t Disallowed with BEDPE output." << endl << endl; | |
| 204 | |
| 205 cerr << "\t-color\t" << "An R,G,B string for the color used with BED12 format." << endl; | |
| 206 cerr << "\t\tDefault is (255,0,0)." << endl << endl; | |
| 207 | |
| 208 cerr << "\t-cigar\t" << "Add the CIGAR string to the BED entry as a 7th column." << endl << endl; | |
| 209 | |
| 210 | |
| 211 // end the program here | |
| 212 exit(1); | |
| 213 } | |
| 214 | |
| 215 | |
| 216 void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag, | |
| 217 const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar) { | |
| 218 // open the BAM file | |
| 219 BamReader reader; | |
| 220 reader.Open(bamFile); | |
| 221 | |
| 222 // get header & reference information | |
| 223 string header = reader.GetHeaderText(); | |
| 224 RefVector refs = reader.GetReferenceData(); | |
| 225 | |
| 226 // rip through the BAM file and convert each mapped entry to BED | |
| 227 BamAlignment bam; | |
| 228 while (reader.GetNextAlignment(bam)) { | |
| 229 if (bam.IsMapped() == true) { | |
| 230 if (writeBed12 == false) // BED | |
| 231 PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar); | |
| 232 else //"blocked" BED | |
| 233 PrintBed12(bam, refs, useEditDistance, bamTag, color); | |
| 234 } | |
| 235 } | |
| 236 reader.Close(); | |
| 237 } | |
| 238 | |
| 239 | |
| 240 /* | |
| 241 Assumptions: | |
| 242 1. The BAM file is grouped/sorted by query name, | |
| 243 not alignment position | |
| 244 */ | |
| 245 void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance) { | |
| 246 // open the BAM file | |
| 247 BamReader reader; | |
| 248 reader.Open(bamFile); | |
| 249 | |
| 250 // get header & reference information | |
| 251 string header = reader.GetHeaderText(); | |
| 252 RefVector refs = reader.GetReferenceData(); | |
| 253 | |
| 254 // rip through the BAM file and convert each mapped entry to BEDPE | |
| 255 BamAlignment bam1, bam2; | |
| 256 while (reader.GetNextAlignment(bam1)) { | |
| 257 // the alignment must be paired | |
| 258 if (bam1.IsPaired() == true) { | |
| 259 // grab the second alignment for the pair. | |
| 260 reader.GetNextAlignment(bam2); | |
| 261 | |
| 262 // require that the alignments are from the same query | |
| 263 if (bam1.Name == bam2.Name) { | |
| 264 PrintBedPE(bam1, bam2, refs, useEditDistance); | |
| 265 } | |
| 266 else { | |
| 267 cerr << "*****ERROR: -bedpe requires BAM to be sorted/grouped by query name. " << endl; | |
| 268 exit(1); | |
| 269 } | |
| 270 } | |
| 271 } | |
| 272 reader.Close(); | |
| 273 } | |
| 274 | |
| 275 | |
| 276 void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts, vector<int> &blockLengths, unsigned int &alignmentEnd) { | |
| 277 | |
| 278 int currPosition = 0; | |
| 279 int blockLength = 0; | |
| 280 | |
| 281 // Rip through the CIGAR ops and figure out if there is more | |
| 282 // than one block for this alignment | |
| 283 vector<CigarOp>::const_iterator cigItr = cigar.begin(); | |
| 284 vector<CigarOp>::const_iterator cigEnd = cigar.end(); | |
| 285 for (; cigItr != cigEnd; ++cigItr) { | |
| 286 switch (cigItr->Type) { | |
| 287 case ('M') : | |
| 288 blockLength += cigItr->Length; | |
| 289 currPosition += cigItr->Length; | |
| 290 case ('I') : break; | |
| 291 case ('S') : break; | |
| 292 case ('D') : break; | |
| 293 blockLength += cigItr->Length; | |
| 294 currPosition += cigItr->Length; | |
| 295 case ('P') : break; | |
| 296 case ('N') : | |
| 297 blockStarts.push_back(currPosition + cigItr->Length); | |
| 298 blockLengths.push_back(blockLength); | |
| 299 currPosition += cigItr->Length; | |
| 300 blockLength = 0; | |
| 301 case ('H') : break; // for 'H' - do nothing, move to next op | |
| 302 default : | |
| 303 printf("ERROR: Invalid Cigar op type\n"); // shouldn't get here | |
| 304 exit(1); | |
| 305 } | |
| 306 } | |
| 307 // add the kast block and set the | |
| 308 // alignment end (i.e., relative to the start) | |
| 309 blockLengths.push_back(blockLength); | |
| 310 alignmentEnd = currPosition; | |
| 311 } | |
| 312 | |
| 313 | |
| 314 string BuildCigarString(const vector<CigarOp> &cigar) { | |
| 315 | |
| 316 stringstream cigarString; | |
| 317 | |
| 318 for (size_t i = 0; i < cigar.size(); ++i) { | |
| 319 //cerr << cigar[i].Type << " " << cigar[i].Length << endl; | |
| 320 switch (cigar[i].Type) { | |
| 321 case ('M') : | |
| 322 case ('I') : | |
| 323 case ('D') : | |
| 324 case ('N') : | |
| 325 case ('S') : | |
| 326 case ('H') : | |
| 327 case ('P') : | |
| 328 cigarString << cigar[i].Length << cigar[i].Type; | |
| 329 } | |
| 330 } | |
| 331 return cigarString.str(); | |
| 332 } | |
| 333 | |
| 334 | |
| 335 void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar) { | |
| 336 | |
| 337 // set the strand | |
| 338 string strand = "+"; | |
| 339 if (bam.IsReverseStrand() == true) strand = "-"; | |
| 340 | |
| 341 // set the name of the feature based on the sequence | |
| 342 string name = bam.Name; | |
| 343 if (bam.IsFirstMate() == true) name += "/1"; | |
| 344 if (bam.IsSecondMate() == true) name += "/2"; | |
| 345 | |
| 346 // get the unpadded (parm = false) end position based on the CIGAR | |
| 347 unsigned int alignmentEnd = bam.GetEndPosition(false, false); | |
| 348 | |
| 349 // report the entire BAM footprint as a single BED entry | |
| 350 if (obeySplits == false) { | |
| 351 // report the alignment in BED6 format. | |
| 352 if (useEditDistance == false && bamTag == "") { | |
| 353 printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, | |
| 354 alignmentEnd, name.c_str(), bam.MapQuality, strand.c_str()); | |
| 355 } | |
| 356 else if (useEditDistance == true && bamTag == "") { | |
| 357 uint32_t editDistance; | |
| 358 if (bam.GetTag("NM", editDistance)) { | |
| 359 printf("%s\t%d\t%d\t\%s\t%u\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, | |
| 360 alignmentEnd, name.c_str(), editDistance, strand.c_str()); | |
| 361 } | |
| 362 else { | |
| 363 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; | |
| 364 exit(1); | |
| 365 } | |
| 366 } | |
| 367 else if (useEditDistance == false && bamTag != "") { | |
| 368 int32_t tagValue; | |
| 369 if (bam.GetTag(bamTag, tagValue)) { | |
| 370 printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position, | |
| 371 alignmentEnd, name.c_str(), tagValue, strand.c_str()); | |
| 372 } | |
| 373 else { | |
| 374 cerr << "The requested tag (" << bamTag << ") was not found in the BAM file. Exiting\n"; | |
| 375 exit(1); | |
| 376 } | |
| 377 } | |
| 378 | |
| 379 // does the user want CIGAR as well? | |
| 380 if (useCigar == false) { | |
| 381 printf("\n"); | |
| 382 } | |
| 383 else { | |
| 384 string cigar = BuildCigarString(bam.CigarData); | |
| 385 printf("\t%s\n", cigar.c_str()); | |
| 386 } | |
| 387 } | |
| 388 // Report each chunk of the BAM alignment as a discrete BED entry | |
| 389 // For example 10M100N10M would be reported as two seprate BED entries of length 10 | |
| 390 else { | |
| 391 vector<BED> bedBlocks; | |
| 392 // Load the alignment blocks in bam into the bedBlocks vector. | |
| 393 // Don't trigger a new block when a "D" (deletion) CIGAR op is found. | |
| 394 getBamBlocks(bam, refs, bedBlocks, false); | |
| 395 | |
| 396 vector<BED>::const_iterator bedItr = bedBlocks.begin(); | |
| 397 vector<BED>::const_iterator bedEnd = bedBlocks.end(); | |
| 398 for (; bedItr != bedEnd; ++bedItr) { | |
| 399 printf("%s\t%d\t%d\t\%s\t%d\t%s\n", refs.at(bam.RefID).RefName.c_str(), bedItr->start, | |
| 400 bedItr->end, name.c_str(), bam.MapQuality, strand.c_str()); | |
| 401 } | |
| 402 } | |
| 403 } | |
| 404 | |
| 405 | |
| 406 void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color) { | |
| 407 | |
| 408 // set the strand | |
| 409 string strand = "+"; | |
| 410 if (bam.IsReverseStrand()) strand = "-"; | |
| 411 | |
| 412 // set the name of the feature based on the sequence | |
| 413 string name = bam.Name; | |
| 414 if (bam.IsFirstMate()) name += "/1"; | |
| 415 if (bam.IsSecondMate()) name += "/2"; | |
| 416 | |
| 417 // parse the CIGAR string and figure out the alignment blocks | |
| 418 unsigned int alignmentEnd; | |
| 419 vector<int> blockLengths; | |
| 420 vector<int> blockStarts; | |
| 421 blockStarts.push_back(0); | |
| 422 | |
| 423 // extract the block starts and lengths from the CIGAR string | |
| 424 ParseCigarBed12(bam.CigarData, blockStarts, blockLengths, alignmentEnd); | |
| 425 alignmentEnd += bam.Position; | |
| 426 | |
| 427 // write BED6 portion | |
| 428 if (useEditDistance == false && bamTag == "") { | |
| 429 printf("%s\t%d\t%d\t\%s\t%d\t%s\t", refs.at(bam.RefID).RefName.c_str(), bam.Position, | |
| 430 alignmentEnd, name.c_str(), bam.MapQuality, strand.c_str()); | |
| 431 } | |
| 432 else if (useEditDistance == true && bamTag != "") { | |
| 433 uint32_t editDistance; | |
| 434 if (bam.GetTag("NM", editDistance)) { | |
| 435 printf("%s\t%d\t%d\t\%s\t%u\t%s\t", refs.at(bam.RefID).RefName.c_str(), bam.Position, | |
| 436 alignmentEnd, name.c_str(), editDistance, strand.c_str()); | |
| 437 } | |
| 438 else { | |
| 439 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; | |
| 440 exit(1); | |
| 441 } | |
| 442 } | |
| 443 else if (useEditDistance == false && bamTag != "") { | |
| 444 int32_t tagValue; | |
| 445 if (bam.GetTag(bamTag, tagValue)) { | |
| 446 printf("%s\t%d\t%d\t\%s\t%d\t%s\t", refs.at(bam.RefID).RefName.c_str(), bam.Position, | |
| 447 alignmentEnd, name.c_str(), tagValue, strand.c_str()); | |
| 448 } | |
| 449 else { | |
| 450 cerr << "The requested tag (" << bamTag << ") was not found in the BAM file. Exiting\n"; | |
| 451 exit(1); | |
| 452 } | |
| 453 } | |
| 454 | |
| 455 // write the colors, etc. | |
| 456 printf("%d\t%d\t%s\t%d\t", bam.Position, alignmentEnd, color.c_str(), (int) blockStarts.size()); | |
| 457 | |
| 458 // now write the lengths portion | |
| 459 unsigned int b; | |
| 460 for (b = 0; b < blockLengths.size() - 1; ++b) { | |
| 461 printf("%d,", blockLengths[b]); | |
| 462 } | |
| 463 printf("%d\t", blockLengths[b]); | |
| 464 | |
| 465 // now write the starts portion | |
| 466 for (b = 0; b < blockStarts.size() - 1; ++b) { | |
| 467 printf("%d,", blockStarts[b]); | |
| 468 } | |
| 469 printf("%d\n", blockStarts[b]); | |
| 470 } | |
| 471 | |
| 472 | |
| 473 void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, bool useEditDistance) { | |
| 474 | |
| 475 // initialize BEDPE variables | |
| 476 string chrom1, chrom2, strand1, strand2; | |
| 477 int start1, start2, end1, end2; | |
| 478 uint32_t editDistance1, editDistance2; | |
| 479 start1 = start2 = end1 = end2 = -1; | |
| 480 chrom1 = chrom2 = strand1 = strand2 = "."; | |
| 481 editDistance1 = editDistance2 = 0; | |
| 482 uint16_t minMapQuality = 0; | |
| 483 | |
| 484 // extract relevant info for end 1 | |
| 485 if (bam1.IsMapped()) { | |
| 486 chrom1 = refs.at(bam1.RefID).RefName; | |
| 487 start1 = bam1.Position; | |
| 488 end1 = bam1.GetEndPosition(false); | |
| 489 strand1 = "+"; | |
| 490 if (bam1.IsReverseStrand()) strand1 = "-"; | |
| 491 | |
| 492 // extract the edit distance from the NM tag | |
| 493 // if possible. otherwise, complain. | |
| 494 if (useEditDistance == true && bam1.GetTag("NM", editDistance1) == false) { | |
| 495 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; | |
| 496 exit(1); | |
| 497 } | |
| 498 } | |
| 499 | |
| 500 // extract relevant info for end 2 | |
| 501 if (bam2.IsMapped()) { | |
| 502 chrom2 = refs.at(bam2.RefID).RefName; | |
| 503 start2 = bam2.Position; | |
| 504 end2 = bam2.GetEndPosition(false); | |
| 505 strand2 = "+"; | |
| 506 if (bam2.IsReverseStrand()) strand2 = "-"; | |
| 507 | |
| 508 // extract the edit distance from the NM tag | |
| 509 // if possible. otherwise, complain. | |
| 510 if (useEditDistance == true && bam2.GetTag("NM", editDistance2) == false) { | |
| 511 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n"; | |
| 512 exit(1); | |
| 513 } | |
| 514 } | |
| 515 | |
| 516 // swap the ends if necessary | |
| 517 if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) { | |
| 518 swap(chrom1, chrom2); | |
| 519 swap(start1, start2); | |
| 520 swap(end1, end2); | |
| 521 swap(strand1, strand2); | |
| 522 } | |
| 523 | |
| 524 // report BEDPE using min mapQuality | |
| 525 if (useEditDistance == false) { | |
| 526 // compute the minimum mapping quality b/w the two ends of the pair. | |
| 527 if (bam1.IsMapped() == true && bam2.IsMapped() == true) | |
| 528 minMapQuality = min(bam1.MapQuality, bam2.MapQuality); | |
| 529 | |
| 530 printf("%s\t%d\t%d\t\%s\t%d\t%d\t%s\t%d\t%s\t%s\n", | |
| 531 chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2, | |
| 532 bam1.Name.c_str(), minMapQuality, strand1.c_str(), strand2.c_str()); | |
| 533 } | |
| 534 // report BEDPE using total edit distance | |
| 535 else { | |
| 536 uint16_t totalEditDistance = 0; | |
| 537 if (bam1.IsMapped() == true && bam2.IsMapped() == true) | |
| 538 totalEditDistance = editDistance1 + editDistance2; | |
| 539 else if (bam1.IsMapped() == true) | |
| 540 totalEditDistance = editDistance1; | |
| 541 else if (bam2.IsMapped() == true) | |
| 542 totalEditDistance = editDistance2; | |
| 543 | |
| 544 printf("%s\t%d\t%d\t\%s\t%d\t%d\t%s\t%d\t%s\t%s\n", | |
| 545 chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2, | |
| 546 bam1.Name.c_str(), totalEditDistance, strand1.c_str(), strand2.c_str()); | |
| 547 } | |
| 548 } | |
| 549 | |
| 550 | |
| 551 // deprecated. | |
| 552 bool IsCorrectMappingForBEDPE (const BamAlignment &bam) { | |
| 553 | |
| 554 if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) { | |
| 555 return true; | |
| 556 } | |
| 557 else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) { | |
| 558 return true; | |
| 559 } | |
| 560 else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) { | |
| 561 return true; | |
| 562 } | |
| 563 else return false; | |
| 564 } |
