comparison BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 annotateBed.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "annotateBed.h"
14
15 // build
16 BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames,
17 const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth) :
18
19 _mainFile(mainFile),
20 _annoFileNames(annoFileNames),
21 _annoTitles(annoTitles),
22 _sameStrand(sameStrand),
23 _diffStrand(diffStrand),
24 _reportCounts(reportCounts),
25 _reportBoth(reportBoth)
26 {
27 _bed = new BedFile(_mainFile);
28 }
29
30
31 // destroy and delete the open file pointers
32 BedAnnotate::~BedAnnotate(void) {
33 delete _bed;
34 CloseAnnoFiles();
35 }
36
37
38 void BedAnnotate::OpenAnnoFiles() {
39 for (size_t i=0; i < _annoFileNames.size(); ++i) {
40 BedFile *file = new BedFile(_annoFileNames[i]);
41 file->Open();
42 _annoFiles.push_back(file);
43 }
44 }
45
46
47 void BedAnnotate::CloseAnnoFiles() {
48 for (size_t i=0; i < _annoFiles.size(); ++i) {
49 BedFile *file = _annoFiles[i];
50 delete file;
51 _annoFiles[i] = NULL;
52 }
53 }
54
55
56 void BedAnnotate::PrintHeader() {
57 // print a hash to indicate header and then write a tab
58 // for each field in the main file.
59 printf("#");
60 for (size_t i = 0; i < _bed->bedType; ++i)
61 printf("\t");
62
63 // now print the label for each file.
64 if (_reportBoth == false) {
65 for (size_t i = 0; i < _annoTitles.size(); ++i)
66 printf("%s\t", _annoTitles[i].c_str());
67 printf("\n");
68 }
69 else {
70 for (size_t i = 0; i < _annoTitles.size(); ++i)
71 printf("%s_cnt\t%s_pct", _annoTitles[i].c_str(), _annoTitles[i].c_str());
72 printf("\n");
73 }
74 }
75
76
77 void BedAnnotate::InitializeMainFile() {
78 // process each chromosome
79 masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin();
80 masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end();
81 for (; chromItr != chromEnd; ++chromItr) {
82 // for each chrom, process each bin
83 binsToBedCovLists::iterator binItr = chromItr->second.begin();
84 binsToBedCovLists::iterator binEnd = chromItr->second.end();
85 for (; binItr != binEnd; ++binItr) {
86 // initialize BEDCOVLIST in this chrom/bin
87 vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin();
88 vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end();
89 for (; bedItr != bedEnd; ++bedItr) {
90 // initialize the depthMaps, counts, etc. for each anno file.
91 for (size_t i = 0; i < _annoFiles.size(); ++i) {
92 map<unsigned int, DEPTH> dummy;
93 bedItr->depthMapList.push_back(dummy);
94 bedItr->counts.push_back(0);
95 bedItr->minOverlapStarts.push_back(INT_MAX);
96 }
97 }
98 }
99 }
100 }
101
102
103 void BedAnnotate::AnnotateBed() {
104
105 // load the "main" bed file into a map so
106 // that we can easily compare each annoFile to it for overlaps
107 _bed->loadBedCovListFileIntoMap();
108 // open the annotations files for processing;
109 OpenAnnoFiles();
110 // initialize counters, depths, etc. for the main file
111 InitializeMainFile();
112
113 // annotate the main file with the coverage from the annotation files.
114 for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) {
115 // grab the current annotation file.
116 BedFile *anno = _annoFiles[annoIndex];
117 int lineNum = 0;
118 BED a, nullBed;
119 BedLineStatus bedStatus;
120
121 // process each entry in the current anno file
122 while ((bedStatus = anno->GetNextBed(a, lineNum)) != BED_INVALID) {
123 if (bedStatus == BED_VALID) {
124 _bed->countListHits(a, annoIndex, _sameStrand, _diffStrand);
125 a = nullBed;
126 }
127 }
128 }
129
130 // report the annotations of the main file from the anno file.
131 ReportAnnotations();
132 // close the annotations files;
133 CloseAnnoFiles();
134 }
135
136
137 void BedAnnotate::ReportAnnotations() {
138
139 if (_annoTitles.size() > 0) {
140 PrintHeader();
141 }
142
143 // process each chromosome
144 masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin();
145 masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end();
146 for (; chromItr != chromEnd; ++chromItr) {
147 // for each chrom, process each bin
148 binsToBedCovLists::const_iterator binItr = chromItr->second.begin();
149 binsToBedCovLists::const_iterator binEnd = chromItr->second.end();
150 for (; binItr != binEnd; ++binItr) {
151 // for each chrom & bin, compute and report
152 // the observed coverage for each feature
153 vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin();
154 vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end();
155 for (; bedItr != bedEnd; ++bedItr) {
156 // print the main BED entry.
157 _bed->reportBedTab(*bedItr);
158
159 // now report the coverage from each annotation file.
160 for (size_t i = 0; i < _annoFiles.size(); ++i) {
161 unsigned int totalLength = 0;
162 int zeroDepthCount = 0; // number of bases with zero depth
163 int depth = 0; // tracks the depth at the current base
164
165 // the start is either the first base in the feature OR
166 // the leftmost position of an overlapping feature. e.g. (s = start):
167 // A ----------
168 // B s ------------
169 int start = min(bedItr->minOverlapStarts[i], bedItr->start);
170
171 map<unsigned int, DEPTH>::const_iterator depthItr;
172 map<unsigned int, DEPTH>::const_iterator depthEnd;
173
174 // compute the coverage observed at each base in the feature marching from start to end.
175 for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) {
176 // map pointer grabbing the starts and ends observed at this position
177 depthItr = bedItr->depthMapList[i].find(pos);
178 depthEnd = bedItr->depthMapList[i].end();
179
180 // increment coverage if starts observed at this position.
181 if (depthItr != depthEnd)
182 depth += depthItr->second.starts;
183 // update zero depth
184 if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0))
185 zeroDepthCount++;
186 // decrement coverage if ends observed at this position.
187 if (depthItr != depthEnd)
188 depth = depth - depthItr->second.ends;
189 }
190 // Summarize the coverage for the current interval,
191 CHRPOS length = bedItr->end - bedItr->start;
192 totalLength += length;
193 int nonZeroBases = (length - zeroDepthCount);
194 float fractCovered = (float) nonZeroBases / length;
195 if (_reportCounts == false && _reportBoth == false)
196 printf("%f\t", fractCovered);
197 else if (_reportCounts == true && _reportBoth == false)
198 printf("%d\t", bedItr->counts[i]);
199 else if (_reportCounts == false && _reportBoth == true)
200 printf("%d\t%f\t", bedItr->counts[i], fractCovered);
201 }
202 // print newline for next feature.
203 printf("\n");
204 }
205 }
206 }
207 }
208
209