Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/annotateBed/annotateBed.cpp @ 1:bec36315bd12 default tip
Deleted selected files
| author | aaronquinlan |
|---|---|
| date | Sat, 19 Nov 2011 14:17:03 -0500 |
| parents | dfcd8b6c1bda |
| children |
comparison
equal
deleted
inserted
replaced
| 0:dfcd8b6c1bda | 1:bec36315bd12 |
|---|---|
| 1 /***************************************************************************** | |
| 2 annotateBed.cpp | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #include "lineFileUtilities.h" | |
| 13 #include "annotateBed.h" | |
| 14 | |
| 15 // build | |
| 16 BedAnnotate::BedAnnotate(const string &mainFile, const vector<string> &annoFileNames, | |
| 17 const vector<string> &annoTitles, bool sameStrand, bool diffStrand, bool reportCounts, bool reportBoth) : | |
| 18 | |
| 19 _mainFile(mainFile), | |
| 20 _annoFileNames(annoFileNames), | |
| 21 _annoTitles(annoTitles), | |
| 22 _sameStrand(sameStrand), | |
| 23 _diffStrand(diffStrand), | |
| 24 _reportCounts(reportCounts), | |
| 25 _reportBoth(reportBoth) | |
| 26 { | |
| 27 _bed = new BedFile(_mainFile); | |
| 28 } | |
| 29 | |
| 30 | |
| 31 // destroy and delete the open file pointers | |
| 32 BedAnnotate::~BedAnnotate(void) { | |
| 33 delete _bed; | |
| 34 CloseAnnoFiles(); | |
| 35 } | |
| 36 | |
| 37 | |
| 38 void BedAnnotate::OpenAnnoFiles() { | |
| 39 for (size_t i=0; i < _annoFileNames.size(); ++i) { | |
| 40 BedFile *file = new BedFile(_annoFileNames[i]); | |
| 41 file->Open(); | |
| 42 _annoFiles.push_back(file); | |
| 43 } | |
| 44 } | |
| 45 | |
| 46 | |
| 47 void BedAnnotate::CloseAnnoFiles() { | |
| 48 for (size_t i=0; i < _annoFiles.size(); ++i) { | |
| 49 BedFile *file = _annoFiles[i]; | |
| 50 delete file; | |
| 51 _annoFiles[i] = NULL; | |
| 52 } | |
| 53 } | |
| 54 | |
| 55 | |
| 56 void BedAnnotate::PrintHeader() { | |
| 57 // print a hash to indicate header and then write a tab | |
| 58 // for each field in the main file. | |
| 59 printf("#"); | |
| 60 for (size_t i = 0; i < _bed->bedType; ++i) | |
| 61 printf("\t"); | |
| 62 | |
| 63 // now print the label for each file. | |
| 64 if (_reportBoth == false) { | |
| 65 for (size_t i = 0; i < _annoTitles.size(); ++i) | |
| 66 printf("%s\t", _annoTitles[i].c_str()); | |
| 67 printf("\n"); | |
| 68 } | |
| 69 else { | |
| 70 for (size_t i = 0; i < _annoTitles.size(); ++i) | |
| 71 printf("%s_cnt\t%s_pct", _annoTitles[i].c_str(), _annoTitles[i].c_str()); | |
| 72 printf("\n"); | |
| 73 } | |
| 74 } | |
| 75 | |
| 76 | |
| 77 void BedAnnotate::InitializeMainFile() { | |
| 78 // process each chromosome | |
| 79 masterBedCovListMap::iterator chromItr = _bed->bedCovListMap.begin(); | |
| 80 masterBedCovListMap::iterator chromEnd = _bed->bedCovListMap.end(); | |
| 81 for (; chromItr != chromEnd; ++chromItr) { | |
| 82 // for each chrom, process each bin | |
| 83 binsToBedCovLists::iterator binItr = chromItr->second.begin(); | |
| 84 binsToBedCovLists::iterator binEnd = chromItr->second.end(); | |
| 85 for (; binItr != binEnd; ++binItr) { | |
| 86 // initialize BEDCOVLIST in this chrom/bin | |
| 87 vector<BEDCOVLIST>::iterator bedItr = binItr->second.begin(); | |
| 88 vector<BEDCOVLIST>::iterator bedEnd = binItr->second.end(); | |
| 89 for (; bedItr != bedEnd; ++bedItr) { | |
| 90 // initialize the depthMaps, counts, etc. for each anno file. | |
| 91 for (size_t i = 0; i < _annoFiles.size(); ++i) { | |
| 92 map<unsigned int, DEPTH> dummy; | |
| 93 bedItr->depthMapList.push_back(dummy); | |
| 94 bedItr->counts.push_back(0); | |
| 95 bedItr->minOverlapStarts.push_back(INT_MAX); | |
| 96 } | |
| 97 } | |
| 98 } | |
| 99 } | |
| 100 } | |
| 101 | |
| 102 | |
| 103 void BedAnnotate::AnnotateBed() { | |
| 104 | |
| 105 // load the "main" bed file into a map so | |
| 106 // that we can easily compare each annoFile to it for overlaps | |
| 107 _bed->loadBedCovListFileIntoMap(); | |
| 108 // open the annotations files for processing; | |
| 109 OpenAnnoFiles(); | |
| 110 // initialize counters, depths, etc. for the main file | |
| 111 InitializeMainFile(); | |
| 112 | |
| 113 // annotate the main file with the coverage from the annotation files. | |
| 114 for (size_t annoIndex = 0; annoIndex < _annoFiles.size(); ++annoIndex) { | |
| 115 // grab the current annotation file. | |
| 116 BedFile *anno = _annoFiles[annoIndex]; | |
| 117 int lineNum = 0; | |
| 118 BED a, nullBed; | |
| 119 BedLineStatus bedStatus; | |
| 120 | |
| 121 // process each entry in the current anno file | |
| 122 while ((bedStatus = anno->GetNextBed(a, lineNum)) != BED_INVALID) { | |
| 123 if (bedStatus == BED_VALID) { | |
| 124 _bed->countListHits(a, annoIndex, _sameStrand, _diffStrand); | |
| 125 a = nullBed; | |
| 126 } | |
| 127 } | |
| 128 } | |
| 129 | |
| 130 // report the annotations of the main file from the anno file. | |
| 131 ReportAnnotations(); | |
| 132 // close the annotations files; | |
| 133 CloseAnnoFiles(); | |
| 134 } | |
| 135 | |
| 136 | |
| 137 void BedAnnotate::ReportAnnotations() { | |
| 138 | |
| 139 if (_annoTitles.size() > 0) { | |
| 140 PrintHeader(); | |
| 141 } | |
| 142 | |
| 143 // process each chromosome | |
| 144 masterBedCovListMap::const_iterator chromItr = _bed->bedCovListMap.begin(); | |
| 145 masterBedCovListMap::const_iterator chromEnd = _bed->bedCovListMap.end(); | |
| 146 for (; chromItr != chromEnd; ++chromItr) { | |
| 147 // for each chrom, process each bin | |
| 148 binsToBedCovLists::const_iterator binItr = chromItr->second.begin(); | |
| 149 binsToBedCovLists::const_iterator binEnd = chromItr->second.end(); | |
| 150 for (; binItr != binEnd; ++binItr) { | |
| 151 // for each chrom & bin, compute and report | |
| 152 // the observed coverage for each feature | |
| 153 vector<BEDCOVLIST>::const_iterator bedItr = binItr->second.begin(); | |
| 154 vector<BEDCOVLIST>::const_iterator bedEnd = binItr->second.end(); | |
| 155 for (; bedItr != bedEnd; ++bedItr) { | |
| 156 // print the main BED entry. | |
| 157 _bed->reportBedTab(*bedItr); | |
| 158 | |
| 159 // now report the coverage from each annotation file. | |
| 160 for (size_t i = 0; i < _annoFiles.size(); ++i) { | |
| 161 unsigned int totalLength = 0; | |
| 162 int zeroDepthCount = 0; // number of bases with zero depth | |
| 163 int depth = 0; // tracks the depth at the current base | |
| 164 | |
| 165 // the start is either the first base in the feature OR | |
| 166 // the leftmost position of an overlapping feature. e.g. (s = start): | |
| 167 // A ---------- | |
| 168 // B s ------------ | |
| 169 int start = min(bedItr->minOverlapStarts[i], bedItr->start); | |
| 170 | |
| 171 map<unsigned int, DEPTH>::const_iterator depthItr; | |
| 172 map<unsigned int, DEPTH>::const_iterator depthEnd; | |
| 173 | |
| 174 // compute the coverage observed at each base in the feature marching from start to end. | |
| 175 for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) { | |
| 176 // map pointer grabbing the starts and ends observed at this position | |
| 177 depthItr = bedItr->depthMapList[i].find(pos); | |
| 178 depthEnd = bedItr->depthMapList[i].end(); | |
| 179 | |
| 180 // increment coverage if starts observed at this position. | |
| 181 if (depthItr != depthEnd) | |
| 182 depth += depthItr->second.starts; | |
| 183 // update zero depth | |
| 184 if ((pos > bedItr->start) && (pos <= bedItr->end) && (depth == 0)) | |
| 185 zeroDepthCount++; | |
| 186 // decrement coverage if ends observed at this position. | |
| 187 if (depthItr != depthEnd) | |
| 188 depth = depth - depthItr->second.ends; | |
| 189 } | |
| 190 // Summarize the coverage for the current interval, | |
| 191 CHRPOS length = bedItr->end - bedItr->start; | |
| 192 totalLength += length; | |
| 193 int nonZeroBases = (length - zeroDepthCount); | |
| 194 float fractCovered = (float) nonZeroBases / length; | |
| 195 if (_reportCounts == false && _reportBoth == false) | |
| 196 printf("%f\t", fractCovered); | |
| 197 else if (_reportCounts == true && _reportBoth == false) | |
| 198 printf("%d\t", bedItr->counts[i]); | |
| 199 else if (_reportCounts == false && _reportBoth == true) | |
| 200 printf("%d\t%f\t", bedItr->counts[i], fractCovered); | |
| 201 } | |
| 202 // print newline for next feature. | |
| 203 printf("\n"); | |
| 204 } | |
| 205 } | |
| 206 } | |
| 207 } | |
| 208 | |
| 209 |
