annotate BEDTools-Version-2.14.3/src/pairToBed/pairToBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 pairToBed.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "pairToBed.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 bool IsCorrectMappingForBEDPE (const BamAlignment &bam) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize > 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 else if ( (bam.RefID == bam.MateRefID) && (bam.InsertSize == 0) && bam.IsFirstMate() ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 else if ( (bam.RefID != bam.MateRefID) && bam.IsFirstMate() ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 BedIntersectPE::BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 string searchType, bool sameStrand, bool diffStrand, bool bamInput,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 bool bamOutput, bool uncompressedBam, bool useEditDistance) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 _bedAFilePE = bedAFilePE;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 _bedBFile = bedBFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 _overlapFraction = overlapFraction;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 _sameStrand = sameStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 _diffStrand = diffStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 _useEditDistance = useEditDistance;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 _searchType = searchType;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 _bamInput = bamInput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 _bamOutput = bamOutput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 _isUncompressedBam = uncompressedBam;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 _bedA = new BedFilePE(bedAFilePE);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 _bedB = new BedFile(bedBFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 if (_bamInput == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 IntersectBedPE();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 IntersectBamPE(_bedAFilePE);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 BedIntersectPE::~BedIntersectPE(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 void BedIntersectPE::FindOverlaps(const BEDPE &a, vector<BED> &hits1, vector<BED> &hits2, const string &type) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 // list of hits on each end of BEDPE
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 // that exceed the requested overlap fraction
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 vector<BED> qualityHits1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 vector<BED> qualityHits2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 // count of hits on each end of BEDPE
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 // that exceed the requested overlap fraction
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 int numOverlapsEnd1 = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 int numOverlapsEnd2 = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 // make sure we have a valid chromosome before we search
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 if (a.chrom1 != ".") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 // Find the quality hits between ***end1*** of the BEDPE and the B BED file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 _bedB->FindOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1, hits1, _sameStrand, _diffStrand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 vector<BED>::const_iterator h = hits1.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 vector<BED>::const_iterator hitsEnd = hits1.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 for (; h != hitsEnd; ++h) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 int s = max(a.start1, h->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 int e = min(a.end1, h->end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 int overlapBases = (e - s); // the number of overlapping bases b/w a and b
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 int aLength = (a.end1 - a.start1); // the length of a in b.p.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 // is there enough overlap relative to the user's request? (default ~ 1bp)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 numOverlapsEnd1++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 if (type == "either") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 _bedB->reportBedNewLine(*h);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 qualityHits1.push_back(*h);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 // make sure we have a valid chromosome before we search
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 if (a.chrom2 != ".") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 // Now find the quality hits between ***end2*** of the BEDPE and the B BED file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 _bedB->FindOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2, hits2, _sameStrand, _diffStrand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 vector<BED>::const_iterator h = hits2.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 vector<BED>::const_iterator hitsEnd = hits2.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 for (; h != hitsEnd; ++h) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 int s = max(a.start2, h->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 int e = min(a.end2, h->end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 int overlapBases = (e - s); // the number of overlapping bases b/w a and b
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 int aLength = (a.end2 - a.start2); // the length of a in b.p.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 // is there enough overlap relative to the user's request? (default ~ 1bp)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 if ( ( (float) overlapBases / (float) aLength ) >= _overlapFraction ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 numOverlapsEnd2++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 if (type == "either") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 _bedB->reportBedNewLine(*h);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 qualityHits2.push_back(*h);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 // Now report the hits depending on what the user has requested.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 if (type == "neither") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 == 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 _bedA->reportBedPENewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 else if (type == "notboth") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 == 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 _bedA->reportBedPENewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 else if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 == 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 for (vector<BED>::iterator q = qualityHits1.begin(); q != qualityHits1.end(); ++q) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 _bedB->reportBedNewLine(*q);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 else if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 > 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 for (vector<BED>::iterator q = qualityHits2.begin(); q != qualityHits2.end(); ++q) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 _bedB->reportBedNewLine(*q);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 else if (type == "xor") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 == 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 for (vector<BED>::iterator q = qualityHits1.begin(); q != qualityHits1.end(); ++q) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 _bedB->reportBedNewLine(*q);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171 else if ( (numOverlapsEnd1 == 0) && (numOverlapsEnd2 > 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 for (vector<BED>::iterator q = qualityHits2.begin(); q != qualityHits2.end(); ++q) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 _bedB->reportBedNewLine(*q);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 else if (type == "both") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 if ( (numOverlapsEnd1 > 0) && (numOverlapsEnd2 > 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 for (vector<BED>::iterator q = qualityHits1.begin(); q != qualityHits1.end(); ++q) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 _bedB->reportBedNewLine(*q);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 for (vector<BED>::iterator q = qualityHits2.begin(); q != qualityHits2.end(); ++q) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186 _bedB->reportBedNewLine(*q);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 bool BedIntersectPE::FindOneOrMoreOverlaps(const BEDPE &a, const string &type) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 // flags for the existence of hits on each end of BEDPE
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196 // that exceed the requested overlap fraction
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197 bool end1Found = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 bool end2Found = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 // Look for overlaps in end 1 assuming we have an aligned chromosome.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201 if (a.chrom1 != ".") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202 end1Found = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom1, a.start1, a.end1, a.strand1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203 _sameStrand, _diffStrand, _overlapFraction);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205 // can we bail out without checking end2?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 if ((type == "either") && (end1Found == true)) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 else if ((type == "neither") && (end1Found == true)) return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208 else if ((type == "notboth") && (end1Found == false)) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209 else if ((type == "both") && (end1Found == false)) return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212 // Now look for overlaps in end 2 assuming we have an aligned chromosome.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213 if (a.chrom2 != ".") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214 end2Found = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom2, a.start2, a.end2, a.strand2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 _sameStrand, _diffStrand, _overlapFraction);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217 if ((type == "either") && (end2Found == true)) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218 else if ((type == "neither") && (end2Found == true)) return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 else if ((type == "notboth") && (end2Found == false)) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220 else if ((type == "both") && (end2Found == false)) return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223 // Now report the hits depending on what the user has requested.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224 if (type == "notboth") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225 if ( (end1Found == false) || (end2Found == false) ) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228 else if (type == "either") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229 if ( (end1Found == false) && (end2Found == false) ) return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231 else if (type == "neither") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 if ( (end1Found == false) && (end2Found == false) ) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 else if (type == "xor") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 if ( (end1Found == true) && (end2Found == false) ) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237 else if ( (end1Found == false) && (end2Found == true) ) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238 else return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 else if (type == "both") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241 if ( (end1Found == true) && (end2Found == true) ) return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248 void BedIntersectPE::FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250 // count of hits on _between_ end of BEDPE
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251 // that exceed the requested overlap fraction
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252 int numOverlaps = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253 CHRPOS spanStart = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254 CHRPOS spanEnd = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 CHRPOS spanLength = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257 if ((type == "ispan") || (type == "notispan")) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258 spanStart = a.end1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259 spanEnd = a.start2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260 if (a.end1 > a.start2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261 spanStart = a.end2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262 spanEnd = a.start1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265 else if ((type == "ospan") || (type == "notospan")) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266 spanStart = a.start1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267 spanEnd = a.end2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268 if (a.start1 > a.start2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269 spanStart = a.start2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270 spanEnd = a.end1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273 spanLength = spanEnd - spanStart;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 // get the hits for the span
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276 _bedB->FindOverlapsPerBin(a.chrom1, spanStart, spanEnd, a.strand1, hits, _sameStrand, _diffStrand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278 vector<BED>::const_iterator h = hits.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 vector<BED>::const_iterator hitsEnd = hits.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280 for (; h != hitsEnd; ++h) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 int s = max(spanStart, h->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283 int e = min(spanEnd, h->end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 int overlapBases = (e - s); // the number of overlapping bases b/w a and b
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285 int spanLength = (spanEnd - spanStart); // the length of a in b.p.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287 // is there enough overlap relative to the user's request? (default ~ 1bp)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288 if ( ( (float) overlapBases / (float) spanLength ) >= _overlapFraction ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289 numOverlaps++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
290 if ((type == "ispan") || (type == "ospan")) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
291 _bedA->reportBedPETab(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
292 _bedB->reportBedNewLine(*h);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
293 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
294 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
295 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
296
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
297 if ( ( (type == "notispan") || (type == "notospan") ) && numOverlaps == 0 ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
298 _bedA->reportBedPENewLine(a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
299 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
300 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
301
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
302
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
303 bool BedIntersectPE::FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
304
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
305 int spanStart = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
306 int spanEnd = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
307 int spanLength = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
308 bool overlapFound;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
309
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
310 if ((type == "ispan") || (type == "notispan")) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
311 spanStart = a.end1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
312 spanEnd = a.start2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
313 if (a.end1 > a.start2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
314 spanStart = a.end2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
315 spanEnd = a.start1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
316 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
317 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
318 else if ((type == "ospan") || (type == "notospan")) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
319 spanStart = a.start1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
320 spanEnd = a.end2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
321 if (a.start1 > a.start2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
322 spanStart = a.start2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
323 spanEnd = a.end1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
324 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
325 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
326 spanLength = spanEnd - spanStart;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
327
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
328 overlapFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom1, spanStart, spanEnd, a.strand1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
329 _sameStrand, _diffStrand, _overlapFraction);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
330
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
331 return overlapFound;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
332 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
333
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
334
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
335 void BedIntersectPE::IntersectBedPE() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
336
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
337 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
338 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
339 _bedB->loadBedFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
340
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
341 int lineNum = 0; // current input line number
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
342 vector<BED> hits, hits1, hits2; // vector of potential hits
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
343
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
344 // reserve some space
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
345 hits.reserve(100);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
346 hits1.reserve(100);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
347 hits2.reserve(100);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
348
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
349 BEDPE a, nullBedPE;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
350 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
351
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
352 _bedA->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
353 while ((bedStatus = _bedA->GetNextBedPE(a, lineNum)) != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
354 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
355 if ( (_searchType == "ispan") || (_searchType == "ospan") ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
356 (_searchType == "notispan") || (_searchType == "notospan") ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
357 if (a.chrom1 == a.chrom2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
358 FindSpanningOverlaps(a, hits, _searchType);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
359 hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
360 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
361 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
362 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
363 FindOverlaps(a, hits1, hits2, _searchType);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
364 hits1.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
365 hits2.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
366 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
367 a = nullBedPE;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
368 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
369 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
370 _bedA->Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
371 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
372
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
373
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
374 void BedIntersectPE::IntersectBamPE(string bamFile) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
375
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
376 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
377 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
378 _bedB->loadBedFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
379
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
380 // open the BAM file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
381 BamReader reader;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
382 BamWriter writer;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
383 reader.Open(bamFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
384
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
385 // get header & reference information
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
386 string bamHeader = reader.GetHeaderText();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
387 RefVector refs = reader.GetReferenceData();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
388
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
389 // open a BAM output to stdout if we are writing BAM
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
390 if (_bamOutput == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
391 // set compression mode
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
392 BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
393 if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
394 writer.SetCompressionMode(compressionMode);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
395 // open our BAM writer
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
396 writer.Open("stdout", bamHeader, refs);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
397 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
398
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
399 // track the previous and current sequence
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
400 // names so that we can identify blocks of
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
401 // alignments for a given read ID.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
402 string prevName, currName;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
403 prevName = currName = "";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
404
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
405 vector<BamAlignment> alignments; // vector of BAM alignments for a given ID in a BAM file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
406 alignments.reserve(100);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
407
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
408 _bedA->bedType = 10; // it's a full BEDPE given it's BAM
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
409
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
410 // rip through the BAM file and convert each mapped entry to BEDPE
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
411 BamAlignment bam1, bam2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
412 while (reader.GetNextAlignment(bam1)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
413 // the alignment must be paired
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
414 if (bam1.IsPaired() == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
415 // grab the second alignment for the pair.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
416 reader.GetNextAlignment(bam2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
417
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
418 // require that the alignments are from the same query
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
419 if (bam1.Name == bam2.Name) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
420 ProcessBamBlock(bam1, bam2, refs, writer);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
421 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
422 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
423 cerr << "*****ERROR: BAM must be sorted by query name. " << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
424 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
425 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
426 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
427 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
428 // close up
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
429 reader.Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
430 if (_bamOutput == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
431 writer.Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
432 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
433 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
434
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
435
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
436 void BedIntersectPE::ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
437 const RefVector &refs, BamWriter &writer) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
438
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
439 vector<BED> hits, hits1, hits2; // vector of potential hits
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
440 hits.reserve(1000); // reserve some space
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
441 hits1.reserve(1000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
442 hits2.reserve(1000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
443
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
444 bool overlapsFound; // flag to indicate if overlaps were found
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
445
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
446 if ( (_searchType == "either") || (_searchType == "xor") ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
447 (_searchType == "both") || (_searchType == "notboth") ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
448 (_searchType == "neither") ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
449
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
450 // create a new BEDPE feature from the BAM alignments.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
451 BEDPE a;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
452 ConvertBamToBedPE(bam1, bam2, refs, a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
453 if (_bamOutput == true) { // BAM output
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
454 // write to BAM if correct hits found
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
455 overlapsFound = FindOneOrMoreOverlaps(a, _searchType);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
456 if (overlapsFound == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
457 writer.SaveAlignment(bam1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
458 writer.SaveAlignment(bam2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
459 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
460 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
461 else { // BEDPE output
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
462 FindOverlaps(a, hits1, hits2, _searchType);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
463 hits1.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
464 hits2.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
465 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
466 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
467 else if ( (_searchType == "ispan") || (_searchType == "ospan") ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
468 // only look for ispan and ospan when both ends are mapped.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
469 if (bam1.IsMapped() && bam2.IsMapped()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
470 // only do an inspan or outspan check if the alignment is intrachromosomal
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
471 if (bam1.RefID == bam2.RefID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
472 // create a new BEDPE feature from the BAM alignments.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
473 BEDPE a;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
474 ConvertBamToBedPE(bam1, bam2, refs, a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
475 if (_bamOutput == true) { // BAM output
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
476 // look for overlaps, and write to BAM if >=1 were found
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
477 overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
478 if (overlapsFound == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
479 writer.SaveAlignment(bam1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
480 writer.SaveAlignment(bam2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
481 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
482 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
483 else { // BEDPE output
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
484 FindSpanningOverlaps(a, hits, _searchType);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
485 hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
486 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
487 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
488 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
489 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
490 else if ( (_searchType == "notispan") || (_searchType == "notospan") ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
491 // only look for notispan and notospan when both ends are mapped.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
492 if (bam1.IsMapped() && bam2.IsMapped()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
493 // only do an inspan or outspan check if the alignment is intrachromosomal
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
494 if (bam1.RefID == bam2.RefID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
495 // create a new BEDPE feature from the BAM alignments.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
496 BEDPE a;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
497 ConvertBamToBedPE(bam1, bam2, refs, a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
498 if (_bamOutput == true) { // BAM output
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
499 // write to BAM if there were no overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
500 overlapsFound = FindOneOrMoreSpanningOverlaps(a, _searchType);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
501 if (overlapsFound == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
502 writer.SaveAlignment(bam1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
503 writer.SaveAlignment(bam2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
504 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
505 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
506 else { // BEDPE output
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
507 FindSpanningOverlaps(a, hits, _searchType);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
508 hits.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
509 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
510 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
511 // if inter-chromosomal or orphaned, we know it's not ispan and not ospan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
512 else if (_bamOutput == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
513 writer.SaveAlignment(bam1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
514 writer.SaveAlignment(bam2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
515 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
516 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
517 // if both ends aren't mapped, we know that it's notispan and not ospan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
518 else if (_bamOutput == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
519 writer.SaveAlignment(bam1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
520 writer.SaveAlignment(bam2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
521 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
522 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
523 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
524
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
525