0
|
1 /*****************************************************************************
|
|
2 bamToBed.cpp
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licenced under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #include "version.h"
|
|
13 #include "api/BamReader.h"
|
|
14 #include "api/BamAux.h"
|
|
15 #include "BamAncillary.h"
|
|
16 #include "bedFile.h"
|
|
17 using namespace BamTools;
|
|
18
|
|
19 #include <vector>
|
|
20 #include <algorithm> // for swap()
|
|
21 #include <iostream>
|
|
22 #include <fstream>
|
|
23 #include <stdlib.h>
|
|
24
|
|
25 using namespace std;
|
|
26
|
|
27
|
|
28 // define our program name
|
|
29 #define PROGRAM_NAME "bamToBed"
|
|
30
|
|
31 // define our parameter checking macro
|
|
32 #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
|
|
33
|
|
34
|
|
35 // function declarations
|
|
36 void ShowHelp(void);
|
|
37
|
|
38 void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag,
|
|
39 const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar);
|
|
40 void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance);
|
|
41
|
|
42 void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar);
|
|
43 void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color = "255,0,0");
|
|
44 void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2,
|
|
45 const RefVector &refs, bool useEditDistance);
|
|
46
|
|
47 void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts,
|
|
48 vector<int> &blockEnds, int &alignmentEnd);
|
|
49 string BuildCigarString(const vector<CigarOp> &cigar);
|
|
50
|
|
51 bool IsCorrectMappingForBEDPE (const BamAlignment &bam);
|
|
52
|
|
53
|
|
54 int main(int argc, char* argv[]) {
|
|
55
|
|
56 // our configuration variables
|
|
57 bool showHelp = false;
|
|
58
|
|
59 // input files
|
|
60 string bamFile = "stdin";
|
|
61 string color = "255,0,0";
|
|
62 string tag = "";
|
|
63
|
|
64 bool haveBam = true;
|
|
65 bool haveColor = false;
|
|
66 bool haveOtherTag = false;
|
|
67 bool writeBedPE = false;
|
|
68 bool writeBed12 = false;
|
|
69 bool useEditDistance = false;
|
|
70 bool useAlignmentScore = false;
|
|
71 bool useCigar = false;
|
|
72 bool obeySplits = false;
|
|
73
|
|
74 // check to see if we should print out some help
|
|
75
|
|
76 for(int i = 1; i < argc; i++) {
|
|
77 int parameterLength = (int)strlen(argv[i]);
|
|
78
|
|
79 if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
|
|
80 (PARAMETER_CHECK("--help", 5, parameterLength))) {
|
|
81 showHelp = true;
|
|
82 }
|
|
83 }
|
|
84
|
|
85 if(showHelp) ShowHelp();
|
|
86
|
|
87 // do some parsing (all of these parameters require 2 strings)
|
|
88 for(int i = 1; i < argc; i++) {
|
|
89
|
|
90 int parameterLength = (int)strlen(argv[i]);
|
|
91
|
|
92 if(PARAMETER_CHECK("-i", 2, parameterLength)) {
|
|
93 if ((i+1) < argc) {
|
|
94 bamFile = argv[i + 1];
|
|
95 i++;
|
|
96 }
|
|
97 }
|
|
98 else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) {
|
|
99 writeBedPE = true;
|
|
100 }
|
|
101 else if(PARAMETER_CHECK("-bed12", 6, parameterLength)) {
|
|
102 writeBed12 = true;
|
|
103 }
|
|
104 else if(PARAMETER_CHECK("-split", 6, parameterLength)) {
|
|
105 obeySplits = true;
|
|
106 }
|
|
107 else if(PARAMETER_CHECK("-ed", 3, parameterLength)) {
|
|
108 useEditDistance = true;
|
|
109 }
|
|
110 else if(PARAMETER_CHECK("-cigar", 6, parameterLength)) {
|
|
111 useCigar = true;
|
|
112 }
|
|
113 else if(PARAMETER_CHECK("-as", 3, parameterLength)) {
|
|
114 useAlignmentScore = true;
|
|
115 }
|
|
116 else if(PARAMETER_CHECK("-color", 6, parameterLength)) {
|
|
117 if ((i+1) < argc) {
|
|
118 haveColor = true;
|
|
119 color = argv[i + 1];
|
|
120 i++;
|
|
121 }
|
|
122 }
|
|
123 else if(PARAMETER_CHECK("-tag", 4, parameterLength)) {
|
|
124 if ((i+1) < argc) {
|
|
125 haveOtherTag = true;
|
|
126 tag = argv[i + 1];
|
|
127 i++;
|
|
128 }
|
|
129 }
|
|
130 else {
|
|
131 cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
|
|
132 showHelp = true;
|
|
133 }
|
|
134 }
|
|
135
|
|
136 // make sure we have an input files
|
|
137 if (haveBam == false) {
|
|
138 cerr << endl << "*****" << endl << "*****ERROR: Need -i (BAM) file. " << endl << "*****" << endl;
|
|
139 showHelp = true;
|
|
140 }
|
|
141 if (haveColor == true && writeBed12 == false) {
|
|
142 cerr << endl << "*****" << endl << "*****ERROR: Cannot use color without BED12. " << endl << "*****" << endl;
|
|
143 showHelp = true;
|
|
144 }
|
|
145 if (useEditDistance == true && obeySplits == true) {
|
|
146 cerr << endl << "*****" << endl << "*****ERROR: Cannot use -ed with -splits. Edit distances cannot be computed for each \'chunk\'." << endl << "*****" << endl;
|
|
147 showHelp = true;
|
|
148 }
|
|
149 if (useEditDistance == true && useCigar == true) {
|
|
150 cerr << endl << "*****" << endl << "*****ERROR: Cannot use -cigar with -splits. Not yet supported." << endl << "*****" << endl;
|
|
151 showHelp = true;
|
|
152 }
|
|
153 if (useEditDistance == true && haveOtherTag == true) {
|
|
154 cerr << endl << "*****" << endl << "*****ERROR: Cannot use -ed with -tag. Choose one or the other." << endl << "*****" << endl;
|
|
155 showHelp = true;
|
|
156 }
|
|
157 if (writeBedPE == true && haveOtherTag == true) {
|
|
158 cerr << endl << "*****" << endl << "*****ERROR: Cannot use -tag with -bedpe." << endl << "*****" << endl;
|
|
159 showHelp = true;
|
|
160 }
|
|
161 // if there are no problems, let's convert BAM to BED or BEDPE
|
|
162 if (!showHelp) {
|
|
163 if (writeBedPE == false)
|
|
164 ConvertBamToBed(bamFile, useEditDistance, tag, writeBed12, obeySplits, color, useCigar); // BED or "blocked BED"
|
|
165 else
|
|
166 ConvertBamToBedpe(bamFile, useEditDistance); // BEDPE
|
|
167 }
|
|
168 else {
|
|
169 ShowHelp();
|
|
170 }
|
|
171 }
|
|
172
|
|
173
|
|
174 void ShowHelp(void) {
|
|
175
|
|
176 cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
|
|
177
|
|
178 cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
|
|
179
|
|
180 cerr << "Summary: Converts BAM alignments to BED6 or BEDPE format." << endl << endl;
|
|
181
|
|
182 cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bam> " << endl << endl;
|
|
183
|
|
184 cerr << "Options: " << endl;
|
|
185
|
|
186 cerr << "\t-bedpe\t" << "Write BEDPE format." << endl;
|
|
187 cerr << "\t\t- Requires BAM to be grouped or sorted by query." << endl << endl;
|
|
188
|
|
189 cerr << "\t-bed12\t" << "Write \"blocked\" BED format (aka \"BED12\")." << endl << endl;
|
|
190 cerr << "\t\thttp://genome-test.cse.ucsc.edu/FAQ/FAQformat#format1" << endl << endl;
|
|
191
|
|
192 cerr << "\t-split\t" << "Report \"split\" BAM alignments as separate BED entries." << endl << endl;
|
|
193
|
|
194 cerr << "\t-ed\t" << "Use BAM edit distance (NM tag) for BED score." << endl;
|
|
195 cerr << "\t\t- Default for BED is to use mapping quality." << endl;
|
|
196 cerr << "\t\t- Default for BEDPE is to use the minimum of" << endl;
|
|
197 cerr << "\t\t the two mapping qualities for the pair." << endl;
|
|
198 cerr << "\t\t- When -ed is used with -bedpe, the total edit" << endl;
|
|
199 cerr << "\t\t distance from the two mates is reported." << endl << endl;
|
|
200
|
|
201 cerr << "\t-tag\t" << "Use other NUMERIC BAM alignment tag for BED score." << endl;
|
|
202 cerr << "\t\t- Default for BED is to use mapping quality." << endl;
|
|
203 cerr << "\t\t Disallowed with BEDPE output." << endl << endl;
|
|
204
|
|
205 cerr << "\t-color\t" << "An R,G,B string for the color used with BED12 format." << endl;
|
|
206 cerr << "\t\tDefault is (255,0,0)." << endl << endl;
|
|
207
|
|
208 cerr << "\t-cigar\t" << "Add the CIGAR string to the BED entry as a 7th column." << endl << endl;
|
|
209
|
|
210
|
|
211 // end the program here
|
|
212 exit(1);
|
|
213 }
|
|
214
|
|
215
|
|
216 void ConvertBamToBed(const string &bamFile, const bool &useEditDistance, const string &bamTag,
|
|
217 const bool &writeBed12, const bool &obeySplits, const string &color, const bool &useCigar) {
|
|
218 // open the BAM file
|
|
219 BamReader reader;
|
|
220 reader.Open(bamFile);
|
|
221
|
|
222 // get header & reference information
|
|
223 string header = reader.GetHeaderText();
|
|
224 RefVector refs = reader.GetReferenceData();
|
|
225
|
|
226 // rip through the BAM file and convert each mapped entry to BED
|
|
227 BamAlignment bam;
|
|
228 while (reader.GetNextAlignment(bam)) {
|
|
229 if (bam.IsMapped() == true) {
|
|
230 if (writeBed12 == false) // BED
|
|
231 PrintBed(bam, refs, useEditDistance, bamTag, obeySplits, useCigar);
|
|
232 else //"blocked" BED
|
|
233 PrintBed12(bam, refs, useEditDistance, bamTag, color);
|
|
234 }
|
|
235 }
|
|
236 reader.Close();
|
|
237 }
|
|
238
|
|
239
|
|
240 /*
|
|
241 Assumptions:
|
|
242 1. The BAM file is grouped/sorted by query name,
|
|
243 not alignment position
|
|
244 */
|
|
245 void ConvertBamToBedpe(const string &bamFile, const bool &useEditDistance) {
|
|
246 // open the BAM file
|
|
247 BamReader reader;
|
|
248 reader.Open(bamFile);
|
|
249
|
|
250 // get header & reference information
|
|
251 string header = reader.GetHeaderText();
|
|
252 RefVector refs = reader.GetReferenceData();
|
|
253
|
|
254 // rip through the BAM file and convert each mapped entry to BEDPE
|
|
255 BamAlignment bam1, bam2;
|
|
256 while (reader.GetNextAlignment(bam1)) {
|
|
257 // the alignment must be paired
|
|
258 if (bam1.IsPaired() == true) {
|
|
259 // grab the second alignment for the pair.
|
|
260 reader.GetNextAlignment(bam2);
|
|
261
|
|
262 // require that the alignments are from the same query
|
|
263 if (bam1.Name == bam2.Name) {
|
|
264 PrintBedPE(bam1, bam2, refs, useEditDistance);
|
|
265 }
|
|
266 else {
|
|
267 cerr << "*****ERROR: -bedpe requires BAM to be sorted/grouped by query name. " << endl;
|
|
268 exit(1);
|
|
269 }
|
|
270 }
|
|
271 }
|
|
272 reader.Close();
|
|
273 }
|
|
274
|
|
275
|
|
276 void ParseCigarBed12(const vector<CigarOp> &cigar, vector<int> &blockStarts, vector<int> &blockLengths, unsigned int &alignmentEnd) {
|
|
277
|
|
278 int currPosition = 0;
|
|
279 int blockLength = 0;
|
|
280
|
|
281 // Rip through the CIGAR ops and figure out if there is more
|
|
282 // than one block for this alignment
|
|
283 vector<CigarOp>::const_iterator cigItr = cigar.begin();
|
|
284 vector<CigarOp>::const_iterator cigEnd = cigar.end();
|
|
285 for (; cigItr != cigEnd; ++cigItr) {
|
|
286 switch (cigItr->Type) {
|
|
287 case ('M') :
|
|
288 blockLength += cigItr->Length;
|
|
289 currPosition += cigItr->Length;
|
|
290 case ('I') : break;
|
|
291 case ('S') : break;
|
|
292 case ('D') : break;
|
|
293 blockLength += cigItr->Length;
|
|
294 currPosition += cigItr->Length;
|
|
295 case ('P') : break;
|
|
296 case ('N') :
|
|
297 blockStarts.push_back(currPosition + cigItr->Length);
|
|
298 blockLengths.push_back(blockLength);
|
|
299 currPosition += cigItr->Length;
|
|
300 blockLength = 0;
|
|
301 case ('H') : break; // for 'H' - do nothing, move to next op
|
|
302 default :
|
|
303 printf("ERROR: Invalid Cigar op type\n"); // shouldn't get here
|
|
304 exit(1);
|
|
305 }
|
|
306 }
|
|
307 // add the kast block and set the
|
|
308 // alignment end (i.e., relative to the start)
|
|
309 blockLengths.push_back(blockLength);
|
|
310 alignmentEnd = currPosition;
|
|
311 }
|
|
312
|
|
313
|
|
314 string BuildCigarString(const vector<CigarOp> &cigar) {
|
|
315
|
|
316 stringstream cigarString;
|
|
317
|
|
318 for (size_t i = 0; i < cigar.size(); ++i) {
|
|
319 //cerr << cigar[i].Type << " " << cigar[i].Length << endl;
|
|
320 switch (cigar[i].Type) {
|
|
321 case ('M') :
|
|
322 case ('I') :
|
|
323 case ('D') :
|
|
324 case ('N') :
|
|
325 case ('S') :
|
|
326 case ('H') :
|
|
327 case ('P') :
|
|
328 cigarString << cigar[i].Length << cigar[i].Type;
|
|
329 }
|
|
330 }
|
|
331 return cigarString.str();
|
|
332 }
|
|
333
|
|
334
|
|
335 void PrintBed(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, bool obeySplits, bool useCigar) {
|
|
336
|
|
337 // set the strand
|
|
338 string strand = "+";
|
|
339 if (bam.IsReverseStrand() == true) strand = "-";
|
|
340
|
|
341 // set the name of the feature based on the sequence
|
|
342 string name = bam.Name;
|
|
343 if (bam.IsFirstMate() == true) name += "/1";
|
|
344 if (bam.IsSecondMate() == true) name += "/2";
|
|
345
|
|
346 // get the unpadded (parm = false) end position based on the CIGAR
|
|
347 unsigned int alignmentEnd = bam.GetEndPosition(false, false);
|
|
348
|
|
349 // report the entire BAM footprint as a single BED entry
|
|
350 if (obeySplits == false) {
|
|
351 // report the alignment in BED6 format.
|
|
352 if (useEditDistance == false && bamTag == "") {
|
|
353 printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position,
|
|
354 alignmentEnd, name.c_str(), bam.MapQuality, strand.c_str());
|
|
355 }
|
|
356 else if (useEditDistance == true && bamTag == "") {
|
|
357 uint32_t editDistance;
|
|
358 if (bam.GetTag("NM", editDistance)) {
|
|
359 printf("%s\t%d\t%d\t\%s\t%u\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position,
|
|
360 alignmentEnd, name.c_str(), editDistance, strand.c_str());
|
|
361 }
|
|
362 else {
|
|
363 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
|
|
364 exit(1);
|
|
365 }
|
|
366 }
|
|
367 else if (useEditDistance == false && bamTag != "") {
|
|
368 int32_t tagValue;
|
|
369 if (bam.GetTag(bamTag, tagValue)) {
|
|
370 printf("%s\t%d\t%d\t\%s\t%d\t%s", refs.at(bam.RefID).RefName.c_str(), bam.Position,
|
|
371 alignmentEnd, name.c_str(), tagValue, strand.c_str());
|
|
372 }
|
|
373 else {
|
|
374 cerr << "The requested tag (" << bamTag << ") was not found in the BAM file. Exiting\n";
|
|
375 exit(1);
|
|
376 }
|
|
377 }
|
|
378
|
|
379 // does the user want CIGAR as well?
|
|
380 if (useCigar == false) {
|
|
381 printf("\n");
|
|
382 }
|
|
383 else {
|
|
384 string cigar = BuildCigarString(bam.CigarData);
|
|
385 printf("\t%s\n", cigar.c_str());
|
|
386 }
|
|
387 }
|
|
388 // Report each chunk of the BAM alignment as a discrete BED entry
|
|
389 // For example 10M100N10M would be reported as two seprate BED entries of length 10
|
|
390 else {
|
|
391 vector<BED> bedBlocks;
|
|
392 // Load the alignment blocks in bam into the bedBlocks vector.
|
|
393 // Don't trigger a new block when a "D" (deletion) CIGAR op is found.
|
|
394 getBamBlocks(bam, refs, bedBlocks, false);
|
|
395
|
|
396 vector<BED>::const_iterator bedItr = bedBlocks.begin();
|
|
397 vector<BED>::const_iterator bedEnd = bedBlocks.end();
|
|
398 for (; bedItr != bedEnd; ++bedItr) {
|
|
399 printf("%s\t%d\t%d\t\%s\t%d\t%s\n", refs.at(bam.RefID).RefName.c_str(), bedItr->start,
|
|
400 bedItr->end, name.c_str(), bam.MapQuality, strand.c_str());
|
|
401 }
|
|
402 }
|
|
403 }
|
|
404
|
|
405
|
|
406 void PrintBed12(const BamAlignment &bam, const RefVector &refs, bool useEditDistance, const string &bamTag, string color) {
|
|
407
|
|
408 // set the strand
|
|
409 string strand = "+";
|
|
410 if (bam.IsReverseStrand()) strand = "-";
|
|
411
|
|
412 // set the name of the feature based on the sequence
|
|
413 string name = bam.Name;
|
|
414 if (bam.IsFirstMate()) name += "/1";
|
|
415 if (bam.IsSecondMate()) name += "/2";
|
|
416
|
|
417 // parse the CIGAR string and figure out the alignment blocks
|
|
418 unsigned int alignmentEnd;
|
|
419 vector<int> blockLengths;
|
|
420 vector<int> blockStarts;
|
|
421 blockStarts.push_back(0);
|
|
422
|
|
423 // extract the block starts and lengths from the CIGAR string
|
|
424 ParseCigarBed12(bam.CigarData, blockStarts, blockLengths, alignmentEnd);
|
|
425 alignmentEnd += bam.Position;
|
|
426
|
|
427 // write BED6 portion
|
|
428 if (useEditDistance == false && bamTag == "") {
|
|
429 printf("%s\t%d\t%d\t\%s\t%d\t%s\t", refs.at(bam.RefID).RefName.c_str(), bam.Position,
|
|
430 alignmentEnd, name.c_str(), bam.MapQuality, strand.c_str());
|
|
431 }
|
|
432 else if (useEditDistance == true && bamTag != "") {
|
|
433 uint32_t editDistance;
|
|
434 if (bam.GetTag("NM", editDistance)) {
|
|
435 printf("%s\t%d\t%d\t\%s\t%u\t%s\t", refs.at(bam.RefID).RefName.c_str(), bam.Position,
|
|
436 alignmentEnd, name.c_str(), editDistance, strand.c_str());
|
|
437 }
|
|
438 else {
|
|
439 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
|
|
440 exit(1);
|
|
441 }
|
|
442 }
|
|
443 else if (useEditDistance == false && bamTag != "") {
|
|
444 int32_t tagValue;
|
|
445 if (bam.GetTag(bamTag, tagValue)) {
|
|
446 printf("%s\t%d\t%d\t\%s\t%d\t%s\t", refs.at(bam.RefID).RefName.c_str(), bam.Position,
|
|
447 alignmentEnd, name.c_str(), tagValue, strand.c_str());
|
|
448 }
|
|
449 else {
|
|
450 cerr << "The requested tag (" << bamTag << ") was not found in the BAM file. Exiting\n";
|
|
451 exit(1);
|
|
452 }
|
|
453 }
|
|
454
|
|
455 // write the colors, etc.
|
|
456 printf("%d\t%d\t%s\t%d\t", bam.Position, alignmentEnd, color.c_str(), (int) blockStarts.size());
|
|
457
|
|
458 // now write the lengths portion
|
|
459 unsigned int b;
|
|
460 for (b = 0; b < blockLengths.size() - 1; ++b) {
|
|
461 printf("%d,", blockLengths[b]);
|
|
462 }
|
|
463 printf("%d\t", blockLengths[b]);
|
|
464
|
|
465 // now write the starts portion
|
|
466 for (b = 0; b < blockStarts.size() - 1; ++b) {
|
|
467 printf("%d,", blockStarts[b]);
|
|
468 }
|
|
469 printf("%d\n", blockStarts[b]);
|
|
470 }
|
|
471
|
|
472
|
|
473 void PrintBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, bool useEditDistance) {
|
|
474
|
|
475 // initialize BEDPE variables
|
|
476 string chrom1, chrom2, strand1, strand2;
|
|
477 int start1, start2, end1, end2;
|
|
478 uint32_t editDistance1, editDistance2;
|
|
479 start1 = start2 = end1 = end2 = -1;
|
|
480 chrom1 = chrom2 = strand1 = strand2 = ".";
|
|
481 editDistance1 = editDistance2 = 0;
|
|
482 uint16_t minMapQuality = 0;
|
|
483
|
|
484 // extract relevant info for end 1
|
|
485 if (bam1.IsMapped()) {
|
|
486 chrom1 = refs.at(bam1.RefID).RefName;
|
|
487 start1 = bam1.Position;
|
|
488 end1 = bam1.GetEndPosition(false);
|
|
489 strand1 = "+";
|
|
490 if (bam1.IsReverseStrand()) strand1 = "-";
|
|
491
|
|
492 // extract the edit distance from the NM tag
|
|
493 // if possible. otherwise, complain.
|
|
494 if (useEditDistance == true && bam1.GetTag("NM", editDistance1) == false) {
|
|
495 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
|
|
496 exit(1);
|
|
497 }
|
|
498 }
|
|
499
|
|
500 // extract relevant info for end 2
|
|
501 if (bam2.IsMapped()) {
|
|
502 chrom2 = refs.at(bam2.RefID).RefName;
|
|
503 start2 = bam2.Position;
|
|
504 end2 = bam2.GetEndPosition(false);
|
|
505 strand2 = "+";
|
|
506 if (bam2.IsReverseStrand()) strand2 = "-";
|
|
507
|
|
508 // extract the edit distance from the NM tag
|
|
509 // if possible. otherwise, complain.
|
|
510 if (useEditDistance == true && bam2.GetTag("NM", editDistance2) == false) {
|
|
511 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
|
|
512 exit(1);
|
|
513 }
|
|
514 }
|
|
515
|
|
516 // swap the ends if necessary
|
|
517 if ( chrom1 > chrom2 || ((chrom1 == chrom2) && (start1 > start2)) ) {
|
|
518 swap(chrom1, chrom2);
|
|
519 swap(start1, start2);
|
|
520 swap(end1, end2);
|
|
521 swap(strand1, strand2);
|
|
522 }
|
|
523
|
|
524 // report BEDPE using min mapQuality
|
|
525 if (useEditDistance == false) {
|
|
526 // compute the minimum mapping quality b/w the two ends of the pair.
|
|
527 if (bam1.IsMapped() == true && bam2.IsMapped() == true)
|
|
528 minMapQuality = min(bam1.MapQuality, bam2.MapQuality);
|
|
529
|
|
530 printf("%s\t%d\t%d\t\%s\t%d\t%d\t%s\t%d\t%s\t%s\n",
|
|
531 chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2,
|
|
532 bam1.Name.c_str(), minMapQuality, strand1.c_str(), strand2.c_str());
|
|
533 }
|
|
534 // report BEDPE using total edit distance
|
|
535 else {
|
|
536 uint16_t totalEditDistance = 0;
|
|
537 if (bam1.IsMapped() == true && bam2.IsMapped() == true)
|
|
538 totalEditDistance = editDistance1 + editDistance2;
|
|
539 else if (bam1.IsMapped() == true)
|
|
540 totalEditDistance = editDistance1;
|
|
541 else if (bam2.IsMapped() == true)
|
|
542 totalEditDistance = editDistance2;
|
|
543
|
|
544 printf("%s\t%d\t%d\t\%s\t%d\t%d\t%s\t%d\t%s\t%s\n",
|
|
545 chrom1.c_str(), start1, end1, chrom2.c_str(), start2, end2,
|
|
546 bam1.Name.c_str(), totalEditDistance, strand1.c_str(), strand2.c_str());
|
|
547 }
|
|
548 }
|
|
549
|
|
550
|
|
551 // deprecated.
|
|
552 bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {
|
|
553
|
|
554 if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {
|
|
555 return true;
|
|
556 }
|
|
557 else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {
|
|
558 return true;
|
|
559 }
|
|
560 else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {
|
|
561 return true;
|
|
562 }
|
|
563 else return false;
|
|
564 }
|