Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp @ 1:bec36315bd12 default tip
Deleted selected files
author | aaronquinlan |
---|---|
date | Sat, 19 Nov 2011 14:17:03 -0500 |
parents | dfcd8b6c1bda |
children |
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/coverageBed/coverageBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,293 +0,0 @@ -/***************************************************************************** - coverageBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "coverageBed.h" - -// build -BedCoverage::BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, - bool writeHistogram, bool bamInput, bool obeySplits, - bool eachBase, bool countsOnly) { - - _bedAFile = bedAFile; - _bedBFile = bedBFile; - - _bedA = new BedFile(bedAFile); - _bedB = new BedFile(bedBFile); - - _sameStrand = sameStrand; - _diffStrand = diffStrand; - _obeySplits = obeySplits; - _eachBase = eachBase; - _writeHistogram = writeHistogram; - _bamInput = bamInput; - _countsOnly = countsOnly; - - - if (_bamInput == false) - CollectCoverageBed(); - else - CollectCoverageBam(_bedA->bedFile); -} - -// destroy -BedCoverage::~BedCoverage(void) { - delete _bedA; - delete _bedB; -} - - -void BedCoverage::CollectCoverageBed() { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bedB->loadBedCovFileIntoMap(); - - int lineNum = 0; // current input line number - BED a, nullBed; - BedLineStatus bedStatus; - - _bedA->Open(); - // process each entry in A - while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - // process the BED entry as a single block - if (_obeySplits == false) - _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly); - // split the BED into discrete blocksand process each independently. - else { - bedVector bedBlocks; - splitBedIntoBlocks(a, lineNum, bedBlocks); - - // use countSplitHits to avoid over-counting each split chunk - // as distinct read coverage. - _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly); - } - a = nullBed; - } - } - _bedA->Close(); - - // report the coverage (summary or histogram) for BED B. - if (_countsOnly == true) - ReportCounts(); - else - ReportCoverage(); -} - - -void BedCoverage::CollectCoverageBam(string bamFile) { - - // load the "B" bed file into a map so - // that we can easily compare "A" to it for overlaps - _bedB->loadBedCovFileIntoMap(); - - // open the BAM file - BamReader reader; - reader.Open(bamFile); - - // get header & reference information - string header = reader.GetHeaderText(); - RefVector refs = reader.GetReferenceData(); - - // convert each aligned BAM entry to BED - // and compute coverage on B - BamAlignment bam; - while (reader.GetNextAlignment(bam)) { - if (bam.IsMapped()) { - // treat the BAM alignment as a single "block" - if (_obeySplits == false) { - // construct a new BED entry from the current BAM alignment. - BED a; - a.chrom = refs.at(bam.RefID).RefName; - a.start = bam.Position; - a.end = bam.GetEndPosition(false, false); - a.strand = "+"; - if (bam.IsReverseStrand()) a.strand = "-"; - - _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly); - } - // split the BAM alignment into discrete blocks and - // look for overlaps only within each block. - else { - // vec to store the discrete BED "blocks" from a - bedVector bedBlocks; - // since we are counting coverage, we do want to split blocks when a - // deletion (D) CIGAR op is encountered (hence the true for the last parm) - getBamBlocks(bam, refs, bedBlocks, true); - // use countSplitHits to avoid over-counting each split chunk - // as distinct read coverage. - _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly); - } - } - } - // report the coverage (summary or histogram) for BED B. - if (_countsOnly == true) - ReportCounts(); - else - ReportCoverage(); - // close the BAM file - reader.Close(); -} - - -void BedCoverage::ReportCounts() { - - - // process each chromosome - masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin(); - masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end(); - for (; chromItr != chromEnd; ++chromItr) - { - // for each chrom, process each bin - binsToBedCovs::const_iterator binItr = chromItr->second.begin(); - binsToBedCovs::const_iterator binEnd = chromItr->second.end(); - for (; binItr != binEnd; ++binItr) - { - // for each chrom & bin, compute and report - // the observed coverage for each feature - vector<BEDCOV>::const_iterator bedItr = binItr->second.begin(); - vector<BEDCOV>::const_iterator bedEnd = binItr->second.end(); - for (; bedItr != bedEnd; ++bedItr) - { - _bedB->reportBedTab(*bedItr); - printf("%d\n", bedItr->count); - } - } - } -} - -void BedCoverage::ReportCoverage() { - - map<unsigned int, unsigned int> allDepthHist; - unsigned int totalLength = 0; - - // process each chromosome - masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin(); - masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end(); - for (; chromItr != chromEnd; ++chromItr) - { - // for each chrom, process each bin - binsToBedCovs::const_iterator binItr = chromItr->second.begin(); - binsToBedCovs::const_iterator binEnd = chromItr->second.end(); - for (; binItr != binEnd; ++binItr) - { - // for each chrom & bin, compute and report - // the observed coverage for each feature - vector<BEDCOV>::const_iterator bedItr = binItr->second.begin(); - vector<BEDCOV>::const_iterator bedEnd = binItr->second.end(); - for (; bedItr != bedEnd; ++bedItr) - { - int zeroDepthCount = 0; // number of bases with zero depth - int depth = 0; // tracks the depth at the current base - - // the start is either the first base in the feature OR - // the leftmost position of an overlapping feature. e.g. (s = start): - // A ---------- - // B s ------------ - int start = min(bedItr->minOverlapStart, bedItr->start); - - // track the number of bases in the feature covered by - // 0, 1, 2, ... n features in A - map<unsigned int, unsigned int> depthHist; - map<unsigned int, DEPTH>::const_iterator depthItr; - - // compute the coverage observed at each base in the feature marching from start to end. - for (CHRPOS pos = start+1; pos <= bedItr->end; pos++) - { - // map pointer grabbing the starts and ends observed at this position - depthItr = bedItr->depthMap.find(pos); - // increment coverage if starts observed at this position. - if (depthItr != bedItr->depthMap.end()) - depth += depthItr->second.starts; - // update coverage assuming the current position is within the current B feature - if ((pos > bedItr->start) && (pos <= bedItr->end)) { - if (depth == 0) zeroDepthCount++; - // update our histograms, assuming we are not reporting "per-base" coverage. - if (_eachBase == false) { - depthHist[depth]++; - allDepthHist[depth]++; - } - else if ((_eachBase == true) && (bedItr->zeroLength == false)) - { - _bedB->reportBedTab(*bedItr); - printf("%d\t%d\n", pos-bedItr->start, depth); - } - } - // decrement coverage if ends observed at this position. - if (depthItr != bedItr->depthMap.end()) - depth = depth - depthItr->second.ends; - } - - // handle the special case where the user wants "per-base" depth - // but the current feature is length = 0. - if ((_eachBase == true) && (bedItr->zeroLength == true)) { - _bedB->reportBedTab(*bedItr); - printf("1\t%d\n",depth); - } - // Summarize the coverage for the current interval, - // assuming the user has not requested "per-base" coverage. - else if (_eachBase == false) - { - CHRPOS length = bedItr->end - bedItr->start; - if (bedItr->zeroLength == true) { - length = 0; - } - totalLength += length; - int nonZeroBases = (length - zeroDepthCount); - - float fractCovered = 0.0; - if (bedItr->zeroLength == false) { - fractCovered = (float) nonZeroBases / length; - } - - // print a summary of the coverage - if (_writeHistogram == false) { - _bedB->reportBedTab(*bedItr); - printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, nonZeroBases, length, fractCovered); - } - // HISTOGRAM - // report the number of bases with coverage == x - else { - // produce a histogram when not a zero length feature. - if (bedItr->zeroLength == false) { - map<unsigned int, unsigned int>::const_iterator histItr = depthHist.begin(); - map<unsigned int, unsigned int>::const_iterator histEnd = depthHist.end(); - for (; histItr != histEnd; ++histItr) - { - float fractAtThisDepth = (float) histItr->second / length; - _bedB->reportBedTab(*bedItr); - printf("%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, length, fractAtThisDepth); - } - } - // special case when it is a zero length feauture. - else { - _bedB->reportBedTab(*bedItr); - printf("%d\t%d\t%d\t%0.7f\n", bedItr->count, 0, 0, 1.0000000); - } - } - } - } - } - } - // report a histogram of coverage among _all_ - // features in B. - if (_writeHistogram == true) { - map<unsigned int, unsigned int>::const_iterator histItr = allDepthHist.begin(); - map<unsigned int, unsigned int>::const_iterator histEnd = allDepthHist.end(); - for (; histItr != histEnd; ++histItr) { - float fractAtThisDepth = (float) histItr->second / totalLength; - printf("all\t%d\t%d\t%d\t%0.7f\n", histItr->first, histItr->second, totalLength, fractAtThisDepth); - } - } -} - -