annotate BEDTools-Version-2.14.3/src/pairToBed/pairToBed.h @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 pairToBed.h
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #ifndef INTERSECTBED_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #define INTERSECTBED_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 #include "api/BamReader.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 #include "api/BamWriter.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 #include "api/BamAux.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 using namespace BamTools;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 #include "bedFile.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 #include "bedFilePE.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 #include <vector>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 #include <iostream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 #include <fstream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 using namespace std;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 /**************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 Helper function protoypes
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 **************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 void IsCorrectMappingForBEDPE (const BamAlignment &bam, const RefVector &refs, BEDPE &a);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 //************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 // Class methods and elements
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 //************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 class BedIntersectPE {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 // constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 BedIntersectPE(string bedAFilePE, string bedBFile, float overlapFraction,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 string searchType, bool sameStrand, bool diffStrand, bool bamInput, bool bamOutput, bool uncompressedBam, bool useEditDistance);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 // destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 ~BedIntersectPE(void);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 void FindOverlaps(const BEDPE &, vector<BED> &hits1, vector<BED> &hits2, const string &type);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 bool FindOneOrMoreOverlaps(const BEDPE &, const string &type);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 void FindSpanningOverlaps(const BEDPE &a, vector<BED> &hits, const string &type);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 bool FindOneOrMoreSpanningOverlaps(const BEDPE &a, const string &type);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 void IntersectBedPE();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 void IntersectBamPE(string bamFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 void DetermineBedPEInput();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 private:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 string _bedAFilePE;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 string _bedBFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 float _overlapFraction;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 string _searchType;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 bool _sameStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 bool _diffStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 bool _useEditDistance;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 bool _bamInput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 bool _bamOutput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 bool _isUncompressedBam;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 // instance of a paired-end bed file class.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 BedFilePE *_bedA;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 // instance of a bed file class.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 BedFile *_bedB;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 inline
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 void ConvertBamToBedPE(const BamAlignment &bam1, const BamAlignment &bam2, const RefVector &refs, BEDPE &a) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 // initialize BEDPE variables
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 a.start1 = a.start2 = a.end1 = a.end2 = -1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 a.chrom1 = a.chrom2 = ".";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 a.strand1 = a.strand2 = '.';
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 uint32_t editDistance1, editDistance2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 editDistance1 = editDistance2 = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 // take the qname from end 1.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 a.name = bam1.Name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 // end 1
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 if (bam1.IsMapped()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 a.chrom1 = refs.at(bam1.RefID).RefName;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 a.start1 = bam1.Position;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 a.end1 = bam1.GetEndPosition(false, false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 a.strand1 = "+";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 if (bam1.IsReverseStrand()) a.strand1 = "-";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 // extract the edit distance from the NM tag
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 // if possible. otherwise, complain.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 if (_useEditDistance == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 if (bam1.GetTag("NM", editDistance1) == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 // end 2
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 if (bam2.IsMapped()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 a.chrom2 = refs.at(bam2.RefID).RefName;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 a.start2 = bam2.Position;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 a.end2 = bam2.GetEndPosition(false, false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 a.strand2 = "+";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 if (bam2.IsReverseStrand()) a.strand2 = "-";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 // extract the edit distance from the NM tag
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 // if possible. otherwise, complain.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 if (_useEditDistance == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 if (bam2.GetTag("NM", editDistance2) == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 cerr << "The edit distance tag (NM) was not found in the BAM file. Please disable -ed. Exiting\n";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 // swap the ends if necessary
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 if ( a.chrom1 > a.chrom2 || ((a.chrom1 == a.chrom2) && (a.start1 > a.start2)) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 swap(a.chrom1, a.chrom2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 swap(a.start1, a.start2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 swap(a.end1, a.end2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 swap(a.strand1, a.strand2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 // compute the minimum mapping quality b/w the two ends of the pair.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 a.score = "0";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 if (_useEditDistance == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 if (bam1.IsMapped() == true && bam2.IsMapped() == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 a.score = ToString(min(bam1.MapQuality, bam2.MapQuality));
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 // BEDPE using edit distance
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 if (bam1.IsMapped() == true && bam2.IsMapped() == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 a.score = ToString((int) (editDistance1 + editDistance2));
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 else if (bam1.IsMapped() == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 a.score = ToString((int) editDistance1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 else if (bam2.IsMapped() == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 a.score = ToString((int) editDistance2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 inline
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 void ProcessBamBlock (const BamAlignment &bam1, const BamAlignment &bam2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 const RefVector &refs,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 BamWriter &writer);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 #endif /* PEINTERSECTBED_H */