Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp @ 1:bec36315bd12 default tip
Deleted selected files
author | aaronquinlan |
---|---|
date | Sat, 19 Nov 2011 14:17:03 -0500 |
parents | dfcd8b6c1bda |
children |
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,396 +0,0 @@ -/***************************************************************************** -genomeCoverage.cpp - -(c) 2009 - Aaron Quinlan -Hall Laboratory -Department of Biochemistry and Molecular Genetics -University of Virginia -aaronquinlan@gmail.com - -Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "genomeCoverageBed.h" - - -BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile, - bool eachBase, bool startSites, - bool bedGraph, bool bedGraphAll, - int max, float scale, - bool bamInput, bool obeySplits, - bool filterByStrand, string requestedStrand, - bool only_5p_end, bool only_3p_end, - bool eachBaseZeroBased, - bool add_gb_track_line, string gb_track_line_opts) { - - _bedFile = bedFile; - _genomeFile = genomeFile; - _eachBase = eachBase; - _eachBaseZeroBased = eachBaseZeroBased; - _startSites = startSites; - _bedGraph = bedGraph; - _bedGraphAll = bedGraphAll; - _max = max; - _scale = scale; - _bamInput = bamInput; - _obeySplits = obeySplits; - _filterByStrand = filterByStrand; - _requestedStrand = requestedStrand; - _only_3p_end = only_3p_end; - _only_5p_end = only_5p_end; - _add_gb_track_line = add_gb_track_line; - _gb_track_line_opts = gb_track_line_opts; - _currChromName = ""; - _currChromSize = 0 ; - - - if (_bamInput == false) { - _genome = new GenomeFile(genomeFile); - } - - PrintTrackDefinitionLine(); - - if (_bamInput == false) { - _bed = new BedFile(bedFile); - CoverageBed(); - } - else { - CoverageBam(_bedFile); - } -} - -void BedGenomeCoverage::PrintTrackDefinitionLine() -{ - //Print Track Definition line (if requested) - if ( (_bedGraph||_bedGraphAll) && _add_gb_track_line) { - string line = "track type=bedGraph"; - if (!_gb_track_line_opts.empty()) { - line += " " ; - line += _gb_track_line_opts ; - } - cout << line << endl; - } - -} - - -BedGenomeCoverage::~BedGenomeCoverage(void) { - delete _bed; - delete _genome; -} - - -void BedGenomeCoverage::ResetChromCoverage() { - _currChromName = ""; - _currChromSize = 0 ; - std::vector<DEPTH>().swap(_currChromCoverage); -} - - -void BedGenomeCoverage::StartNewChrom(const string& newChrom) { - // If we've moved beyond the first encountered chromosomes, - // process the results of the previous chromosome. - if (_currChromName.length() > 0) { - ReportChromCoverage(_currChromCoverage, _currChromSize, - _currChromName, _currChromDepthHist); - } - - // empty the previous chromosome and reserve new - std::vector<DEPTH>().swap(_currChromCoverage); - - if (_visitedChromosomes.find(newChrom) != _visitedChromosomes.end()) { - cerr << "Input error: Chromosome " << _currChromName - << " found in non-sequential lines. This suggests that the input file is not sorted correctly." << endl; - - } - _visitedChromosomes.insert(newChrom); - - _currChromName = newChrom; - - // get the current chrom size and allocate space - _currChromSize = _genome->getChromSize(_currChromName); - - if (_currChromSize >= 0) - _currChromCoverage.resize(_currChromSize); - else { - cerr << "Input error: Chromosome " << _currChromName << " found in your input file but not in your genome file." << endl; - exit(1); - } -} - - -void BedGenomeCoverage::AddCoverage(int start, int end) { - // process the first line for this chromosome. - // make sure the coordinates fit within the chrom - if (start < _currChromSize) - _currChromCoverage[start].starts++; - if (end < _currChromSize) - _currChromCoverage[end].ends++; - else - _currChromCoverage[_currChromSize-1].ends++; -} - - -void BedGenomeCoverage::AddBlockedCoverage(const vector<BED> &bedBlocks) { - vector<BED>::const_iterator bedItr = bedBlocks.begin(); - vector<BED>::const_iterator bedEnd = bedBlocks.end(); - for (; bedItr != bedEnd; ++bedItr) { - // the end - 1 must be done because BamAncillary::getBamBlocks - // returns ends uncorrected for the genomeCoverageBed data structure. - // ugly, but necessary. - AddCoverage(bedItr->start, bedItr->end - 1); - } -} - - -void BedGenomeCoverage::CoverageBed() { - - BED a, nullBed; - int lineNum = 0; // current input line number - BedLineStatus bedStatus; - - ResetChromCoverage(); - - _bed->Open(); - while ( (bedStatus = _bed->GetNextBed(a, lineNum)) != BED_INVALID ) { - if (bedStatus == BED_VALID) { - if (_filterByStrand == true) { - if (a.strand.empty()) { - cerr << "Input error: Interval is missing a strand value on line " << lineNum << "." <<endl; - exit(1); - } - if ( ! (a.strand == "-" || a.strand == "+") ) { - cerr << "Input error: Invalid strand value (" << a.strand << ") on line " << lineNum << "." << endl; - exit(1); - } - // skip if the strand is not what the user requested. - if (a.strand != _requestedStrand) - continue; - } - - // are we on a new chromosome? - if (a.chrom != _currChromName) - StartNewChrom(a.chrom); - - if (_obeySplits == true) { - bedVector bedBlocks; // vec to store the discrete BED "blocks" - splitBedIntoBlocks(a, lineNum, bedBlocks); - AddBlockedCoverage(bedBlocks); - } - else if (_only_5p_end) { - int pos = ( a.strand=="+" ) ? a.start : a.end-1; - AddCoverage(pos,pos); - } - else if (_only_3p_end) { - int pos = ( a.strand=="-" ) ? a.start : a.end-1; - AddCoverage(pos,pos); - } - else - AddCoverage(a.start, a.end-1); - } - } - _bed->Close(); - PrintFinalCoverage(); -} - - -void BedGenomeCoverage::PrintFinalCoverage() -{ - - - // process the results of the last chromosome. - ReportChromCoverage(_currChromCoverage, _currChromSize, - _currChromName, _currChromDepthHist); - if (_eachBase == false && _bedGraph == false && _bedGraphAll == false) { - ReportGenomeCoverage(_currChromDepthHist); - } -} - - -void BedGenomeCoverage::CoverageBam(string bamFile) { - - ResetChromCoverage(); - - // open the BAM file - BamReader reader; - reader.Open(bamFile); - - // get header & reference information - string header = reader.GetHeaderText(); - RefVector refs = reader.GetReferenceData(); - - // load the BAM header references into a BEDTools "genome file" - _genome = new GenomeFile(refs); - // convert each aligned BAM entry to BED - // and compute coverage on B - BamAlignment bam; - while (reader.GetNextAlignment(bam)) { - // skip if the read is unaligned - if (bam.IsMapped() == false) - continue; - - // skip if we care about strands and the strand isn't what - // the user wanted - if ( (_filterByStrand == true) && - ((_requestedStrand == "-") != bam.IsReverseStrand()) ) - continue; - - // extract the chrom, start and end from the BAM alignment - string chrom(refs.at(bam.RefID).RefName); - CHRPOS start = bam.Position; - CHRPOS end = bam.GetEndPosition(false, false) - 1; - - // are we on a new chromosome? - if ( chrom != _currChromName ) - StartNewChrom(chrom); - - // add coverage accordingly. - if (_obeySplits) { - bedVector bedBlocks; - // since we are counting coverage, we do want to split blocks when a - // deletion (D) CIGAR op is encountered (hence the true for the last parm) - getBamBlocks(bam, refs, bedBlocks, true); - AddBlockedCoverage(bedBlocks); - } - else if (_only_5p_end) { - int pos = ( !bam.IsReverseStrand() ) ? start : end; - AddCoverage(pos,pos); - } - else if (_only_3p_end) { - int pos = ( bam.IsReverseStrand() ) ? start : end; - AddCoverage(pos,pos); - } - else - AddCoverage(start, end); - } - // close the BAM - reader.Close(); - PrintFinalCoverage(); -} - - -void BedGenomeCoverage::ReportChromCoverage(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom, chromHistMap &chromDepthHist) { - - if (_eachBase) { - int depth = 0; // initialize the depth - int offset = (_eachBaseZeroBased)?0:1; - for (int pos = 0; pos < chromSize; pos++) { - - depth += chromCov[pos].starts; - // report the depth for this position. - if (depth>0 || !_eachBaseZeroBased) - cout << chrom << "\t" << pos+offset << "\t" << depth * _scale << endl; - depth = depth - chromCov[pos].ends; - } - } - else if (_bedGraph == true || _bedGraphAll == true) { - ReportChromCoverageBedGraph(chromCov, chromSize, chrom); - } - else { - - int depth = 0; // initialize the depth - - for (int pos = 0; pos < chromSize; pos++) { - - depth += chromCov[pos].starts; - - // add the depth at this position to the depth histogram - // for this chromosome. if the depth is greater than the - // maximum bin requested, then readjust the depth to be the max - if (depth >= _max) { - chromDepthHist[chrom][_max]++; - } - else { - chromDepthHist[chrom][depth]++; - } - depth = depth - chromCov[pos].ends; - } - // report the histogram for each chromosome - histMap::const_iterator depthIt = chromDepthHist[chrom].begin(); - histMap::const_iterator depthEnd = chromDepthHist[chrom].end(); - for (; depthIt != depthEnd; ++depthIt) { - int depth = depthIt->first; - unsigned int numBasesAtDepth = depthIt->second; - cout << chrom << "\t" << depth << "\t" << numBasesAtDepth << "\t" - << chromSize << "\t" << (float) ((float)numBasesAtDepth / (float)chromSize) << endl; - } - } -} - - - -void BedGenomeCoverage::ReportGenomeCoverage(chromHistMap &chromDepthHist) { - - // get the list of chromosome names in the genome - vector<string> chromList = _genome->getChromList(); - - unsigned int genomeSize = 0; - vector<string>::const_iterator chromItr = chromList.begin(); - vector<string>::const_iterator chromEnd = chromList.end(); - for (; chromItr != chromEnd; ++chromItr) { - string chrom = *chromItr; - genomeSize += _genome->getChromSize(chrom); - // if there were no reads for a give chromosome, then - // add the length of the chrom to the 0 bin. - if ( chromDepthHist.find(chrom) == chromDepthHist.end() ) { - chromDepthHist[chrom][0] += _genome->getChromSize(chrom); - } - } - - histMap genomeHist; // depth histogram for the entire genome - - // loop through each chromosome and add the depth and number of bases at each depth - // to the aggregate histogram for the entire genome - for (chromHistMap::iterator chromIt = chromDepthHist.begin(); chromIt != chromDepthHist.end(); ++chromIt) { - string chrom = chromIt->first; - for (histMap::iterator depthIt = chromDepthHist[chrom].begin(); depthIt != chromDepthHist[chrom].end(); ++depthIt) { - int depth = depthIt->first; - unsigned int numBasesAtDepth = depthIt->second; - genomeHist[depth] += numBasesAtDepth; - } - } - - // loop through the depths for the entire genome - // and report the number and fraction of bases in - // the entire genome that are at said depth. - for (histMap::iterator genomeDepthIt = genomeHist.begin(); genomeDepthIt != genomeHist.end(); ++genomeDepthIt) { - int depth = genomeDepthIt->first; - unsigned int numBasesAtDepth = genomeDepthIt->second; - - cout << "genome" << "\t" << depth << "\t" << numBasesAtDepth << "\t" - << genomeSize << "\t" << (float) ((float)numBasesAtDepth / (float)genomeSize) << endl; - } -} - - -void BedGenomeCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom) { - - int depth = 0; // initialize the depth - int lastStart = -1; - int lastDepth = -1; - - for (int pos = 0; pos < chromSize; pos++) { - depth += chromCov[pos].starts; - - if (depth != lastDepth) { - // Coverage depth has changed, print the last interval coverage (if any) - // Print if: - // (1) depth>0 (the default running mode), - // (2) depth==0 and the user requested to print zero covered regions (_bedGraphAll) - if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) { - cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth * _scale << endl; - } - //Set current position as the new interval start + depth - lastDepth = depth; - lastStart = pos; - } - // Default: the depth has not changed, so we will not print anything. - // Proceed until the depth changes. - // Update depth - depth = depth - chromCov[pos].ends; - } - //Print information about the last position - if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) { - cout << chrom << "\t" << lastStart << "\t" << chromSize << "\t" << lastDepth * _scale << endl; - } -}