diff BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp @ 1:bec36315bd12 default tip

Deleted selected files
author aaronquinlan
date Sat, 19 Nov 2011 14:17:03 -0500
parents dfcd8b6c1bda
children
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp	Thu Nov 03 10:25:04 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,396 +0,0 @@
-/*****************************************************************************
-genomeCoverage.cpp
-
-(c) 2009 - Aaron Quinlan
-Hall Laboratory
-Department of Biochemistry and Molecular Genetics
-University of Virginia
-aaronquinlan@gmail.com
-
-Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#include "lineFileUtilities.h"
-#include "genomeCoverageBed.h"
-
-
-BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
-                                     bool eachBase, bool startSites, 
-                                     bool bedGraph, bool bedGraphAll,
-                                     int max, float scale,
-                                     bool bamInput, bool obeySplits,
-                                     bool filterByStrand, string requestedStrand,
-                                     bool only_5p_end, bool only_3p_end,
-                                     bool eachBaseZeroBased,
-                                     bool add_gb_track_line, string gb_track_line_opts) {
-
-    _bedFile = bedFile;
-    _genomeFile = genomeFile;
-    _eachBase = eachBase;
-    _eachBaseZeroBased = eachBaseZeroBased;
-    _startSites = startSites;
-    _bedGraph = bedGraph;
-    _bedGraphAll = bedGraphAll;
-    _max = max;
-    _scale = scale;
-    _bamInput = bamInput;
-    _obeySplits = obeySplits;
-    _filterByStrand = filterByStrand;
-    _requestedStrand = requestedStrand;
-    _only_3p_end = only_3p_end;
-    _only_5p_end = only_5p_end;
-    _add_gb_track_line = add_gb_track_line;
-    _gb_track_line_opts = gb_track_line_opts;
-    _currChromName = "";
-    _currChromSize = 0 ;
-
-    
-    if (_bamInput == false) {
-        _genome = new GenomeFile(genomeFile);
-    }
-    
-    PrintTrackDefinitionLine();
-
-    if (_bamInput == false) {
-        _bed = new BedFile(bedFile);
-        CoverageBed();
-    }
-    else {
-        CoverageBam(_bedFile);
-    }
-}
-
-void BedGenomeCoverage::PrintTrackDefinitionLine()
-{
-    //Print Track Definition line (if requested)
-    if ( (_bedGraph||_bedGraphAll) && _add_gb_track_line) {
-        string line = "track type=bedGraph";
-        if (!_gb_track_line_opts.empty()) {
-            line += " " ;
-            line += _gb_track_line_opts ;
-        }
-        cout << line << endl;
-    }
-
-}
-
-
-BedGenomeCoverage::~BedGenomeCoverage(void) {
-    delete _bed;
-    delete _genome;
-}
-
-
-void BedGenomeCoverage::ResetChromCoverage() {
-    _currChromName = "";
-    _currChromSize = 0 ;
-    std::vector<DEPTH>().swap(_currChromCoverage);
-}
-
-
-void BedGenomeCoverage::StartNewChrom(const string& newChrom) {
-    // If we've moved beyond the first encountered chromosomes,
-    // process the results of the previous chromosome.
-    if (_currChromName.length() > 0) {
-        ReportChromCoverage(_currChromCoverage, _currChromSize,
-                _currChromName, _currChromDepthHist);
-    }
-
-    // empty the previous chromosome and reserve new
-    std::vector<DEPTH>().swap(_currChromCoverage);
-
-    if (_visitedChromosomes.find(newChrom) != _visitedChromosomes.end()) {
-        cerr << "Input error: Chromosome " << _currChromName
-             << " found in non-sequential lines. This suggests that the input file is not sorted correctly." << endl;
-
-    }
-    _visitedChromosomes.insert(newChrom);
-
-    _currChromName = newChrom;
-
-    // get the current chrom size and allocate space
-    _currChromSize = _genome->getChromSize(_currChromName);
-
-    if (_currChromSize >= 0)
-        _currChromCoverage.resize(_currChromSize);
-    else {
-        cerr << "Input error: Chromosome " << _currChromName << " found in your input file but not in your genome file." << endl;
-        exit(1);
-    }
-}
-
-
-void BedGenomeCoverage::AddCoverage(int start, int end) {
-    // process the first line for this chromosome.
-    // make sure the coordinates fit within the chrom
-    if (start < _currChromSize)
-        _currChromCoverage[start].starts++;
-    if (end < _currChromSize)
-        _currChromCoverage[end].ends++;
-    else
-        _currChromCoverage[_currChromSize-1].ends++;
-}
-
-
-void BedGenomeCoverage::AddBlockedCoverage(const vector<BED> &bedBlocks) {
-    vector<BED>::const_iterator bedItr = bedBlocks.begin();
-    vector<BED>::const_iterator bedEnd = bedBlocks.end();
-    for (; bedItr != bedEnd; ++bedItr) {
-        // the end - 1 must be done because BamAncillary::getBamBlocks
-        // returns ends uncorrected for the genomeCoverageBed data structure.
-        // ugly, but necessary.
-        AddCoverage(bedItr->start, bedItr->end - 1);
-    }
-}
-
-
-void BedGenomeCoverage::CoverageBed() {
-
-    BED a, nullBed;
-    int lineNum = 0; // current input line number
-    BedLineStatus bedStatus;
-
-    ResetChromCoverage();
-
-    _bed->Open();
-    while ( (bedStatus = _bed->GetNextBed(a, lineNum)) != BED_INVALID ) {
-        if (bedStatus == BED_VALID) {
-            if (_filterByStrand == true) {
-                if (a.strand.empty()) {
-                    cerr << "Input error: Interval is missing a strand value on line " << lineNum << "." <<endl;
-                    exit(1);
-                }
-                if ( ! (a.strand == "-" || a.strand == "+") ) {
-                    cerr << "Input error: Invalid strand value (" << a.strand << ") on line " << lineNum << "." << endl;
-                    exit(1);
-                }
-                // skip if the strand is not what the user requested.
-                if (a.strand != _requestedStrand)
-                    continue;
-            }
-
-            // are we on a new chromosome?
-            if (a.chrom != _currChromName)
-                StartNewChrom(a.chrom);
-
-            if (_obeySplits == true) {
-                bedVector bedBlocks; // vec to store the discrete BED "blocks"
-                splitBedIntoBlocks(a, lineNum, bedBlocks);
-                AddBlockedCoverage(bedBlocks);
-            }
-            else if (_only_5p_end) {
-                int pos = ( a.strand=="+" ) ? a.start : a.end-1;
-                AddCoverage(pos,pos);
-            }
-            else if (_only_3p_end) {
-                int pos = ( a.strand=="-" ) ? a.start : a.end-1;
-                AddCoverage(pos,pos);
-            }
-            else
-                AddCoverage(a.start, a.end-1);
-        }
-    }
-    _bed->Close();
-    PrintFinalCoverage();
-}
-
-
-void BedGenomeCoverage::PrintFinalCoverage()
-{
-
-
-    // process the results of the last chromosome.
-    ReportChromCoverage(_currChromCoverage, _currChromSize,
-            _currChromName, _currChromDepthHist);
-    if (_eachBase == false && _bedGraph == false && _bedGraphAll == false) {
-        ReportGenomeCoverage(_currChromDepthHist);
-    }
-}
-
-
-void BedGenomeCoverage::CoverageBam(string bamFile) {
-
-    ResetChromCoverage();
-
-    // open the BAM file
-    BamReader reader;
-    reader.Open(bamFile);
-
-    // get header & reference information
-    string header = reader.GetHeaderText();
-    RefVector refs = reader.GetReferenceData();
-
-    // load the BAM header references into a BEDTools "genome file"
-    _genome = new GenomeFile(refs);
-    // convert each aligned BAM entry to BED
-    // and compute coverage on B
-    BamAlignment bam;
-    while (reader.GetNextAlignment(bam)) {
-        // skip if the read is unaligned
-        if (bam.IsMapped() == false)
-            continue;
-
-        // skip if we care about strands and the strand isn't what
-        // the user wanted
-        if ( (_filterByStrand == true) &&
-             ((_requestedStrand == "-") != bam.IsReverseStrand()) )
-            continue;
-
-        // extract the chrom, start and end from the BAM alignment
-        string chrom(refs.at(bam.RefID).RefName);
-        CHRPOS start = bam.Position;
-        CHRPOS end = bam.GetEndPosition(false, false) - 1;
-        
-        // are we on a new chromosome?
-        if ( chrom != _currChromName )
-            StartNewChrom(chrom);
-
-        // add coverage accordingly.
-        if (_obeySplits) {
-            bedVector bedBlocks;
-            // since we are counting coverage, we do want to split blocks when a
-            // deletion (D) CIGAR op is encountered (hence the true for the last parm)
-            getBamBlocks(bam, refs, bedBlocks, true);
-            AddBlockedCoverage(bedBlocks);
-        }
-        else if (_only_5p_end) {
-            int pos = ( !bam.IsReverseStrand() ) ? start : end;
-            AddCoverage(pos,pos);
-        }
-        else if (_only_3p_end) {
-            int pos = ( bam.IsReverseStrand() ) ? start : end;
-            AddCoverage(pos,pos);
-        }
-        else
-            AddCoverage(start, end);
-    }
-    // close the BAM
-    reader.Close();
-    PrintFinalCoverage();
-}
-
-
-void BedGenomeCoverage::ReportChromCoverage(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom, chromHistMap &chromDepthHist) {
-
-    if (_eachBase) {
-        int depth = 0; // initialize the depth
-        int offset = (_eachBaseZeroBased)?0:1;
-        for (int pos = 0; pos < chromSize; pos++) {
-
-            depth += chromCov[pos].starts;
-            // report the depth for this position.
-            if (depth>0 || !_eachBaseZeroBased)
-                cout << chrom << "\t" << pos+offset << "\t" << depth * _scale << endl;
-            depth = depth - chromCov[pos].ends;
-        }
-    }
-    else if (_bedGraph == true || _bedGraphAll == true) {
-        ReportChromCoverageBedGraph(chromCov, chromSize, chrom);
-    }
-    else {
-
-        int depth = 0; // initialize the depth
-
-        for (int pos = 0; pos < chromSize; pos++) {
-
-            depth += chromCov[pos].starts;
-
-            // add the depth at this position to the depth histogram
-            // for this chromosome. if the depth is greater than the
-            // maximum bin requested, then readjust the depth to be the max
-            if (depth >= _max) {
-                chromDepthHist[chrom][_max]++;
-            }
-            else {
-                chromDepthHist[chrom][depth]++;
-            }
-            depth = depth - chromCov[pos].ends;
-        }
-        // report the histogram for each chromosome
-        histMap::const_iterator depthIt = chromDepthHist[chrom].begin();
-        histMap::const_iterator depthEnd = chromDepthHist[chrom].end();
-        for (; depthIt != depthEnd; ++depthIt) {
-            int depth = depthIt->first;
-            unsigned int numBasesAtDepth = depthIt->second;
-            cout << chrom << "\t" << depth << "\t" << numBasesAtDepth << "\t"
-                << chromSize << "\t" << (float) ((float)numBasesAtDepth / (float)chromSize) << endl;
-        }
-    }
-}
-
-
-
-void BedGenomeCoverage::ReportGenomeCoverage(chromHistMap &chromDepthHist) {
-
-    // get the list of chromosome names in the genome
-    vector<string> chromList = _genome->getChromList();
-
-    unsigned int genomeSize = 0;
-    vector<string>::const_iterator chromItr = chromList.begin();
-    vector<string>::const_iterator chromEnd = chromList.end();
-    for (; chromItr != chromEnd; ++chromItr) {
-        string chrom = *chromItr;
-        genomeSize += _genome->getChromSize(chrom);
-        // if there were no reads for a give chromosome, then
-        // add the length of the chrom to the 0 bin.
-        if ( chromDepthHist.find(chrom) == chromDepthHist.end() ) {
-            chromDepthHist[chrom][0] += _genome->getChromSize(chrom);
-        }
-    }
-
-    histMap genomeHist; // depth histogram for the entire genome
-
-    // loop through each chromosome and add the depth and number of bases at each depth
-    // to the aggregate histogram for the entire genome
-    for (chromHistMap::iterator chromIt = chromDepthHist.begin(); chromIt != chromDepthHist.end(); ++chromIt) {
-        string chrom = chromIt->first;
-        for (histMap::iterator depthIt = chromDepthHist[chrom].begin(); depthIt != chromDepthHist[chrom].end(); ++depthIt) {
-            int depth = depthIt->first;
-            unsigned int numBasesAtDepth = depthIt->second;
-            genomeHist[depth] += numBasesAtDepth;
-        }
-    }
-
-    // loop through the depths for the entire genome
-    // and report the number and fraction of bases in
-    // the entire genome that are at said depth.
-    for (histMap::iterator genomeDepthIt = genomeHist.begin(); genomeDepthIt != genomeHist.end(); ++genomeDepthIt) {
-        int depth = genomeDepthIt->first;
-        unsigned int numBasesAtDepth = genomeDepthIt->second;
-
-        cout << "genome" << "\t" << depth << "\t" << numBasesAtDepth << "\t"
-            << genomeSize << "\t" << (float) ((float)numBasesAtDepth / (float)genomeSize) << endl;
-    }
-}
-
-
-void BedGenomeCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom) {
-
-    int depth = 0; // initialize the depth
-    int lastStart = -1;
-    int lastDepth = -1;
-
-    for (int pos = 0; pos < chromSize; pos++) {
-        depth += chromCov[pos].starts;
-
-        if (depth != lastDepth) {
-            // Coverage depth has changed, print the last interval coverage (if any)
-            // Print if:
-            // (1) depth>0 (the default running mode),
-            // (2) depth==0 and the user requested to print zero covered regions (_bedGraphAll)
-            if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {
-                cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth * _scale << endl;
-            }
-            //Set current position as the new interval start + depth
-            lastDepth = depth;
-            lastStart = pos;
-        }
-        // Default: the depth has not changed, so we will not print anything.
-        // Proceed until the depth changes.
-        // Update depth
-        depth = depth - chromCov[pos].ends;
-    }
-    //Print information about the last position
-    if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {
-        cout << chrom << "\t" << lastStart << "\t" << chromSize << "\t" << lastDepth * _scale << endl;
-    }
-}