Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 annotateBed.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include "lineFileUtilities.h" | |
13 #include "annotateBed.h" | |
14 | |
15 // build | |
16 BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames, | |
17 const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth) : | |
18 | |
19 _mainFile(mainFile), | |
20 _annoFileNames(annoFileNames), | |
21 _annoTitles(annoTitles), | |
22 _sameStrand(sameStrand), | |
23 _diffStrand(diffStrand), | |
24 _reportCounts(reportCounts), | |
25 _reportBoth(reportBoth) | |
26 { | |
27 _bed = new BedFile(_mainFile); | |
28 } | |
29 | |
30 | |
31 // destroy and delete the open file pointers | |
32 BedAnnotate::~BedAnnotate(void) { | |
33 delete _bed; | |
34 CloseAnnoFiles(); | |
35 } | |
36 | |
37 | |
38 void BedAnnotate::OpenAnnoFiles() { | |
39 for (size_t i=0; i < _annoFileNames.size(); ++i) { | |
40 BedFile *file = new BedFile(_annoFileNames[i]); | |
41 file->Open(); | |
42 _annoFiles.push_back(file); | |
43 } | |
44 } | |
45 | |
46 | |
47 void BedAnnotate::CloseAnnoFiles() { | |
48 for (size_t i=0; i < _annoFiles.size(); ++i) { | |
49 BedFile *file = _annoFiles[i]; | |
50 delete file; | |
51 _annoFiles[i] = NULL; | |
52 } | |
53 } | |
54 | |
55 | |
56 void BedAnnotate::PrintHeader() { | |
57 // print a hash to indicate header and then write a tab | |
58 // for each field in the main file. | |
59 printf("#"); | |
60 for (size_t i = 0; i < _bed->bedType; ++i) | |
61 printf("\t"); | |
62 | |
63 // now print the label for each file. | |
64 if (_reportBoth == false) { | |
65 for (size_t i = 0; i < _annoTitles.size(); ++i) | |
66 printf("%s\t", _annoTitles[i].c_str()); | |
67 printf("\n"); | |
68 } | |
69 else { | |
70 for (size_t i = 0; i < _annoTitles.size(); ++i) | |
71 printf("%s_cnt\t%s_pct", _annoTitles[i].c_str(), _annoTitles[i].c_str()); | |
72 printf("\n"); | |
73 } | |
74 } | |
75 | |
76 | |
77 void BedAnnotate::InitializeMainFile() { | |
78 // process each chromosome | |
79 masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin(); | |
80 masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end(); | |
81 for (; chromItr != chromEnd; ++chromItr) { | |
82 // for each chrom, process each bin | |
83 binsToBedCovLists::iterator binItr = chromItr->second.begin(); | |
84 binsToBedCovLists::iterator binEnd = chromItr->second.end(); | |
85 for (; binItr != binEnd; ++binItr) { | |
86 // initialize BEDCOVLIST in this chrom/bin | |
87 vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin(); | |
88 vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end(); | |
89 for (; bedItr != bedEnd; ++bedItr) { | |
90 // initialize the depthMaps, counts, etc. for each anno file. | |
91 for (size_t i = 0; i < _annoFiles.size(); ++i) { | |
92 map<unsigned int, DEPTH> dummy; | |
93 bedItr->depthMapList.push_back(dummy); | |
94 bedItr->counts.push_back(0); | |
95 bedItr->minOverlapStarts.push_back(INT_MAX); | |
96 } | |
97 } | |
98 } | |
99 } | |
100 } | |
101 | |
102 | |
103 void BedAnnotate::AnnotateBed() { | |
104 | |
105 // load the "main" bed file into a map so | |
106 // that we can easily compare each annoFile to it for overlaps | |
107 _bed->loadBedCovListFileIntoMap(); | |
108 // open the annotations files for processing; | |
109 OpenAnnoFiles(); | |
110 // initialize counters, depths, etc. for the main file | |
111 InitializeMainFile(); | |
112 | |
113 // annotate the main file with the coverage from the annotation files. | |
114 for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) { | |
115 // grab the current annotation file. | |
116 BedFile *anno = _annoFiles[annoIndex]; | |
117 int lineNum = 0; | |
118 BED a, nullBed; | |
119 BedLineStatus bedStatus; | |
120 | |
121 // process each entry in the current anno file | |
122 while ((bedStatus = anno->GetNextBed(a, lineNum)) != BED_INVALID) { | |
123 if (bedStatus == BED_VALID) { | |
124 _bed->countListHits(a, annoIndex, _sameStrand, _diffStrand); | |
125 a = nullBed; | |
126 } | |
127 } | |
128 } | |
129 | |
130 // report the annotations of the main file from the anno file. | |
131 ReportAnnotations(); | |
132 // close the annotations files; | |
133 CloseAnnoFiles(); | |
134 } | |
135 | |
136 | |
137 void BedAnnotate::ReportAnnotations() { | |
138 | |
139 if (_annoTitles.size() > 0) { | |
140 PrintHeader(); | |
141 } | |
142 | |
143 // process each chromosome | |
144 masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin(); | |
145 masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end(); | |
146 for (; chromItr != chromEnd; ++chromItr) { | |
147 // for each chrom, process each bin | |
148 binsToBedCovLists::const_iterator binItr = chromItr->second.begin(); | |
149 binsToBedCovLists::const_iterator binEnd = chromItr->second.end(); | |
150 for (; binItr != binEnd; ++binItr) { | |
151 // for each chrom & bin, compute and report | |
152 // the observed coverage for each feature | |
153 vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin(); | |
154 vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end(); | |
155 for (; bedItr != bedEnd; ++bedItr) { | |
156 // print the main BED entry. | |
157 _bed->reportBedTab(*bedItr); | |
158 | |
159 // now report the coverage from each annotation file. | |
160 for (size_t i = 0; i < _annoFiles.size(); ++i) { | |
161 unsigned int totalLength = 0; | |
162 int zeroDepthCount = 0; // number of bases with zero depth | |
163 int depth = 0; // tracks the depth at the current base | |
164 | |
165 // the start is either the first base in the feature OR | |
166 // the leftmost position of an overlapping feature. e.g. (s = start): | |
167 // A ---------- | |
168 // B s ------------ | |
169 int start = min(bedItr->minOverlapStarts[i], bedItr->start); | |
170 | |
171 map<unsigned int, DEPTH>::const_iterator depthItr; | |
172 map<unsigned int, DEPTH>::const_iterator depthEnd; | |
173 | |
174 // compute the coverage observed at each base in the feature marching from start to end. | |
175 for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) { | |
176 // map pointer grabbing the starts and ends observed at this position | |
177 depthItr = bedItr->depthMapList[i].find(pos); | |
178 depthEnd = bedItr->depthMapList[i].end(); | |
179 | |
180 // increment coverage if starts observed at this position. | |
181 if (depthItr != depthEnd) | |
182 depth += depthItr->second.starts; | |
183 // update zero depth | |
184 if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0)) | |
185 zeroDepthCount++; | |
186 // decrement coverage if ends observed at this position. | |
187 if (depthItr != depthEnd) | |
188 depth = depth - depthItr->second.ends; | |
189 } | |
190 // Summarize the coverage for the current interval, | |
191 CHRPOS length = bedItr->end - bedItr->start; | |
192 totalLength += length; | |
193 int nonZeroBases = (length - zeroDepthCount); | |
194 float fractCovered = (float) nonZeroBases / length; | |
195 if (_reportCounts == false && _reportBoth == false) | |
196 printf("%f\t", fractCovered); | |
197 else if (_reportCounts == true && _reportBoth == false) | |
198 printf("%d\t", bedItr->counts[i]); | |
199 else if (_reportCounts == false && _reportBoth == true) | |
200 printf("%d\t%f\t", bedItr->counts[i], fractCovered); | |
201 } | |
202 // print newline for next feature. | |
203 printf("\n"); | |
204 } | |
205 } | |
206 } | |
207 } | |
208 | |
209 |