Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp Thu Nov 03 10:25:04 2011 -0400 @@ -0,0 +1,134 @@ +/***************************************************************************** + multiBamCov.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "multiBamCov.h" +#include "api/BamMultiReader.h" + + +/* + Constructor +*/ +MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file, + int minQual, bool properOnly, + bool keepDuplicates, bool keepFailedQC) +: +_bam_files(bam_files), +_bed_file(bed_file), +_minQual(minQual), +_properOnly(properOnly), +_keepDuplicates(keepDuplicates), +_keepFailedQC(keepFailedQC) +{ + _bed = new BedFile(_bed_file); + LoadBamFileMap(); +} + + +/* + Destructor +*/ +MultiCovBam::~MultiCovBam(void) +{} + + + +void MultiCovBam::CollectCoverage() +{ + BamMultiReader reader; + + if ( !reader.Open(_bam_files) ) + { + cerr << "Could not open input BAM files." << endl; + exit(1); + } + else + { + // attempt to find index files + reader.LocateIndexes(); + + // if index data available for all BAM files, we can use SetRegion + if ( reader.HasIndexes() ) { + BED bed, nullBed; + int lineNum = 0; + BedLineStatus bedStatus; + + _bed->Open(); + // loop through each BED entry, jump to it, + // and collect coverage from each BAM + while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) + { + if (bedStatus == BED_VALID) + { + // initialize counts for each file to 0 + vector<int> counts(_bam_files.size(), 0); + // get the BAM refId for this chrom. + int refId = reader.GetReferenceID(bed.chrom); + // set up a BamRegion to which to attempt to jump + BamRegion region(refId, (int)bed.start, refId, (int)bed.end); + + // everything checks out, just iterate through specified region, counting alignments + if ( (refId != -1) && (reader.SetRegion(region)) ) { + BamAlignment al; + while ( reader.GetNextAlignment(al) ) + { + bool duplicate = al.IsDuplicate(); + bool failedQC = al.IsFailedQC(); + if (_keepDuplicates) duplicate = false; + if (_keepFailedQC) failedQC = false; + // map qual must exceed minimum + if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) { + // ignore if not properly paired and we actually care. + if (_properOnly && !al.IsProperPair()) + continue; + + // lookup the offset of the file name and tabulate + //coverage for the appropriate file + counts[bamFileMap[al.Filename]]++; + } + } + } + // report the cov at this interval for each file and reset + _bed->reportBedTab(bed); + ReportCounts(counts); + bed = nullBed; + } + } + _bed->Close(); + } + else { + cerr << "Could not find indexes." << endl; + reader.Close(); + exit(1); + } + } +} + + +void MultiCovBam::LoadBamFileMap(void) +{ + for (size_t i = 0; i < _bam_files.size(); ++i) + { + bamFileMap[_bam_files[i]] = i; + } +} + +void MultiCovBam::ReportCounts(const vector<int> &counts) +{ + for (size_t i = 0; i < counts.size(); ++i) + { + if (i < counts.size() - 1) + cout << counts[i] << "\t"; + else + cout << counts[i]; + } + cout << endl; +}