annotate BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 coverageBed.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "coverageBed.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 // build
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 bool writeHistogram, bool bamInput, bool obeySplits,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 bool eachBase, bool countsOnly) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 _bedAFile = bedAFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 _bedBFile = bedBFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 _bedA = new BedFile(bedAFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 _bedB = new BedFile(bedBFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 _sameStrand = sameStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 _diffStrand = diffStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 _obeySplits = obeySplits;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 _eachBase = eachBase;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 _writeHistogram = writeHistogram;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 _bamInput = bamInput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 _countsOnly = countsOnly;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 if (_bamInput == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 CollectCoverageBed();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 CollectCoverageBam(_bedA->bedFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 // destroy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 BedCoverage::~BedCoverage(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 delete _bedA;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 delete _bedB;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 void BedCoverage::CollectCoverageBed() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 _bedB->loadBedCovFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 int lineNum = 0; // current input line number
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 BED a, nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 _bedA->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 // process each entry in A
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 // process the BED entry as a single block
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 if (_obeySplits == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 // split the BED into discrete blocksand process each independently.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 bedVector bedBlocks;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 splitBedIntoBlocks(a, lineNum, bedBlocks);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 // use countSplitHits to avoid over-counting each split chunk
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 // as distinct read coverage.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 a = nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 _bedA->Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 // report the coverage (summary or histogram) for BED B.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 if (_countsOnly == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 ReportCounts();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 ReportCoverage();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 void BedCoverage::CollectCoverageBam(string bamFile) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 // load the "B" bed file into a map so
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 // that we can easily compare "A" to it for overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 _bedB->loadBedCovFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 // open the BAM file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 BamReader reader;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 reader.Open(bamFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 // get header & reference information
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 string header = reader.GetHeaderText();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 RefVector refs = reader.GetReferenceData();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 // convert each aligned BAM entry to BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 // and compute coverage on B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 BamAlignment bam;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 while (reader.GetNextAlignment(bam)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 if (bam.IsMapped()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 // treat the BAM alignment as a single "block"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 if (_obeySplits == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 // construct a new BED entry from the current BAM alignment.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 BED a;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 a.chrom = refs.at(bam.RefID).RefName;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 a.start = bam.Position;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 a.end = bam.GetEndPosition(false, false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 a.strand = "+";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 if (bam.IsReverseStrand()) a.strand = "-";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 // split the BAM alignment into discrete blocks and
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 // look for overlaps only within each block.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 // vec to store the discrete BED "blocks" from a
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 bedVector bedBlocks;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 // since we are counting coverage, we do want to split blocks when a
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 // deletion (D) CIGAR op is encountered (hence the true for the last parm)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 getBamBlocks(bam, refs, bedBlocks, true);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 // use countSplitHits to avoid over-counting each split chunk
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 // as distinct read coverage.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 // report the coverage (summary or histogram) for BED B.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 if (_countsOnly == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 ReportCounts();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 ReportCoverage();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 // close the BAM file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 reader.Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 void BedCoverage::ReportCounts() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 // process each chromosome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 for (; chromItr != chromEnd; ++chromItr)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 // for each chrom, process each bin
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 binsToBedCovs::const_iterator binItr = chromItr->second.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 binsToBedCovs::const_iterator binEnd = chromItr->second.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 for (; binItr != binEnd; ++binItr)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 // for each chrom & bin, compute and report
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 // the observed coverage for each feature
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 vector<BEDCOV>::const_iterator bedItr = binItr->second.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 vector<BEDCOV>::const_iterator bedEnd = binItr->second.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 for (; bedItr != bedEnd; ++bedItr)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 _bedB->reportBedTab(*bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 printf("%d\n", bedItr->count);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 void BedCoverage::ReportCoverage() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 map<unsigned int, unsigned int> allDepthHist;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171 unsigned int totalLength = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 // process each chromosome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 for (; chromItr != chromEnd; ++chromItr)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 // for each chrom, process each bin
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 binsToBedCovs::const_iterator binItr = chromItr->second.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 binsToBedCovs::const_iterator binEnd = chromItr->second.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 for (; binItr != binEnd; ++binItr)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 // for each chrom & bin, compute and report
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 // the observed coverage for each feature
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185 vector<BEDCOV>::const_iterator bedItr = binItr->second.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186 vector<BEDCOV>::const_iterator bedEnd = binItr->second.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 for (; bedItr != bedEnd; ++bedItr)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 int zeroDepthCount = 0; // number of bases with zero depth
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 int depth = 0; // tracks the depth at the current base
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192 // the start is either the first base in the feature OR
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 // the leftmost position of an overlapping feature. e.g. (s = start):
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194 // A ----------
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 // B s ------------
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196 int start = min(bedItr->minOverlapStart, bedItr->start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 // track the number of bases in the feature covered by
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 // 0, 1, 2, ... n features in A
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 map<unsigned int, unsigned int> depthHist;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201 map<unsigned int, DEPTH>::const_iterator depthItr;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203 // compute the coverage observed at each base in the feature marching from start to end.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204 for (CHRPOS pos = start+1; pos <= bedItr->end; pos++)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 // map pointer grabbing the starts and ends observed at this position
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 depthItr = bedItr->depthMap.find(pos);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208 // increment coverage if starts observed at this position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209 if (depthItr != bedItr->depthMap.end())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210 depth += depthItr->second.starts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211 // update coverage assuming the current position is within the current B feature
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212 if ((pos > bedItr->start) && (pos <= bedItr->end)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213 if (depth == 0) zeroDepthCount++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214 // update our histograms, assuming we are not reporting "per-base" coverage.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 if (_eachBase == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216 depthHist[depth]++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217 allDepthHist[depth]++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 else if ((_eachBase == true) && (bedItr->zeroLength == false))
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221 _bedB->reportBedTab(*bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222 printf("%d\t%d\n", pos-bedItr->start, depth);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225 // decrement coverage if ends observed at this position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 if (depthItr != bedItr->depthMap.end())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227 depth = depth - depthItr->second.ends;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 // handle the special case where the user wants "per-base" depth
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231 // but the current feature is length = 0.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 if ((_eachBase == true) && (bedItr->zeroLength == true)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233 _bedB->reportBedTab(*bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 printf("1\t%d\n",depth);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 // Summarize the coverage for the current interval,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237 // assuming the user has not requested "per-base" coverage.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238 else if (_eachBase == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 CHRPOS length = bedItr->end - bedItr->start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241 if (bedItr->zeroLength == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242 length = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244 totalLength += length;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245 int nonZeroBases = (length - zeroDepthCount);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247 float fractCovered = 0.0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248 if (bedItr->zeroLength == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249 fractCovered = (float) nonZeroBases / length;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252 // print a summary of the coverage
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253 if (_writeHistogram == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254 _bedB->reportBedTab(*bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257 // HISTOGRAM
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258 // report the number of bases with coverage == x
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260 // produce a histogram when not a zero length feature.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261 if (bedItr->zeroLength == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262 map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264 for (; histItr != histEnd; ++histItr)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266 float fractAtThisDepth = (float) histItr->second / length;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267 _bedB->reportBedTab(*bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268 printf("%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, length, fractAtThisDepth);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271 // special case when it is a zero length feauture.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273 _bedB->reportBedTab(*bedItr);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274 printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, 0, 0, 1.0000000);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281 // report a histogram of coverage among _all_
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 // features in B.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283 if (_writeHistogram == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285 map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286 for (; histItr != histEnd; ++histItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287 float fractAtThisDepth = (float) histItr->second / totalLength;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288 printf("all\t%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, totalLength, fractAtThisDepth);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
290 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
291 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
292
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
293