0
|
1 /*****************************************************************************
|
|
2 pairToBed.h
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licenced under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #ifndef INTERSECTBED_H
|
|
13 #define INTERSECTBED_H
|
|
14
|
|
15 #include "api/BamReader.h"
|
|
16 #include "api/BamWriter.h"
|
|
17 #include "api/BamAux.h"
|
|
18 using namespace BamTools;
|
|
19
|
|
20 #include "bedFile.h"
|
|
21 #include "bedFilePE.h"
|
|
22 #include <vector>
|
|
23 #include <iostream>
|
|
24 #include <fstream>
|
|
25
|
|
26 using namespace std;
|
|
27
|
|
28
|
|
29
|
|
30 /**************************************************
|
|
31 Helper function protoypes
|
|
32 **************************************************/
|
|
33 void IsCorrectMappingForBEDPE (const BamAlignment &bam, const RefVector &refs, BEDPE &a);
|
|
34
|
|
35
|
|
36
|
|
37 //************************************************
|
|
38 // Class methods and elements
|
|
39 //************************************************
|
|
40 class BedIntersectPE {
|
|
41
|
|
42 public:
|
|
43
|
|
44 // constructor
|
|
45 BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,
|
|
46 string searchType, bool sameStrand, bool diffStrand, bool bamInput, bool bamOutput, bool uncompressedBam, bool useEditDistance);
|
|
47 // destructor
|
|
48 ~BedIntersectPE(void);
|
|
49
|
|
50 void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, const string &type);
|
|
51
|
|
52 bool FindOneOrMoreOverlaps(const BEDPE &, const string &type);
|
|
53
|
|
54 void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type);
|
|
55 bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type);
|
|
56
|
|
57 void IntersectBedPE();
|
|
58 void IntersectBamPE(string bamFile);
|
|
59
|
|
60 void DetermineBedPEInput();
|
|
61
|
|
62 private:
|
|
63
|
|
64 string _bedAFilePE;
|
|
65 string _bedBFile;
|
|
66 float _overlapFraction;
|
|
67 string _searchType;
|
|
68 bool _sameStrand;
|
|
69 bool _diffStrand;
|
|
70 bool _useEditDistance;
|
|
71 bool _bamInput;
|
|
72 bool _bamOutput;
|
|
73 bool _isUncompressedBam;
|
|
74
|
|
75 // instance of a paired-end bed file class.
|
|
76 BedFilePE *_bedA;
|
|
77
|
|
78 // instance of a bed file class.
|
|
79 BedFile *_bedB;
|
|
80
|
|
81 inline
|
|
82 void ConvertBamToBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, BEDPE &a) {
|
|
83
|
|
84 // initialize BEDPE variables
|
|
85 a.start1 = a.start2 = a.end1 = a.end2 = -1;
|
|
86 a.chrom1 = a.chrom2 = ".";
|
|
87 a.strand1 = a.strand2 = '.';
|
|
88 uint32_t editDistance1, editDistance2;
|
|
89 editDistance1 = editDistance2 = 0;
|
|
90
|
|
91 // take the qname from end 1.
|
|
92 a.name = bam1.Name;
|
|
93
|
|
94 // end 1
|
|
95 if (bam1.IsMapped()) {
|
|
96 a.chrom1 = refs.at(bam1.RefID).RefName;
|
|
97 a.start1 = bam1.Position;
|
|
98 a.end1 = bam1.GetEndPosition(false, false);
|
|
99 a.strand1 = "+";
|
|
100 if (bam1.IsReverseStrand()) a.strand1 = "-";
|
|
101
|
|
102 // extract the edit distance from the NM tag
|
|
103 // if possible. otherwise, complain.
|
|
104 if (_useEditDistance == true) {
|
|
105 if (bam1.GetTag("NM", editDistance1) == false) {
|
|
106 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
|
|
107 exit(1);
|
|
108 }
|
|
109 }
|
|
110 }
|
|
111
|
|
112 // end 2
|
|
113 if (bam2.IsMapped()) {
|
|
114 a.chrom2 = refs.at(bam2.RefID).RefName;
|
|
115 a.start2 = bam2.Position;
|
|
116 a.end2 = bam2.GetEndPosition(false, false);
|
|
117 a.strand2 = "+";
|
|
118 if (bam2.IsReverseStrand()) a.strand2 = "-";
|
|
119
|
|
120 // extract the edit distance from the NM tag
|
|
121 // if possible. otherwise, complain.
|
|
122 if (_useEditDistance == true) {
|
|
123 if (bam2.GetTag("NM", editDistance2) == false) {
|
|
124 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
|
|
125 exit(1);
|
|
126 }
|
|
127 }
|
|
128 }
|
|
129
|
|
130 // swap the ends if necessary
|
|
131 if ( a.chrom1 > a.chrom2 || ((a.chrom1 == a.chrom2) && (a.start1 > a.start2)) ) {
|
|
132 swap(a.chrom1, a.chrom2);
|
|
133 swap(a.start1, a.start2);
|
|
134 swap(a.end1, a.end2);
|
|
135 swap(a.strand1, a.strand2);
|
|
136 }
|
|
137
|
|
138 // compute the minimum mapping quality b/w the two ends of the pair.
|
|
139 a.score = "0";
|
|
140 if (_useEditDistance == false) {
|
|
141 if (bam1.IsMapped() == true && bam2.IsMapped() == true)
|
|
142 a.score = ToString(min(bam1.MapQuality, bam2.MapQuality));
|
|
143 }
|
|
144 // BEDPE using edit distance
|
|
145 else {
|
|
146 if (bam1.IsMapped() == true && bam2.IsMapped() == true)
|
|
147 a.score = ToString((int) (editDistance1 + editDistance2));
|
|
148 else if (bam1.IsMapped() == true)
|
|
149 a.score = ToString((int) editDistance1);
|
|
150 else if (bam2.IsMapped() == true)
|
|
151 a.score = ToString((int) editDistance2);
|
|
152 }
|
|
153 };
|
|
154
|
|
155 inline
|
|
156 void ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,
|
|
157 const RefVector &refs,
|
|
158 BamWriter &writer);
|
|
159 };
|
|
160
|
|
161 #endif /* PEINTERSECTBED_H */
|