comparison BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 multiBamCov.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "multiBamCov.h"
14 #include "api/BamMultiReader.h"
15
16
17 /*
18 Constructor
19 */
20 MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file,
21 int minQual, bool properOnly,
22 bool keepDuplicates, bool keepFailedQC)
23 :
24 _bam_files(bam_files),
25 _bed_file(bed_file),
26 _minQual(minQual),
27 _properOnly(properOnly),
28 _keepDuplicates(keepDuplicates),
29 _keepFailedQC(keepFailedQC)
30 {
31 _bed = new BedFile(_bed_file);
32 LoadBamFileMap();
33 }
34
35
36 /*
37 Destructor
38 */
39 MultiCovBam::~MultiCovBam(void)
40 {}
41
42
43
44 void MultiCovBam::CollectCoverage()
45 {
46 BamMultiReader reader;
47
48 if ( !reader.Open(_bam_files) )
49 {
50 cerr << "Could not open input BAM files." << endl;
51 exit(1);
52 }
53 else
54 {
55 // attempt to find index files
56 reader.LocateIndexes();
57
58 // if index data available for all BAM files, we can use SetRegion
59 if ( reader.HasIndexes() ) {
60 BED bed, nullBed;
61 int lineNum = 0;
62 BedLineStatus bedStatus;
63
64 _bed->Open();
65 // loop through each BED entry, jump to it,
66 // and collect coverage from each BAM
67 while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID)
68 {
69 if (bedStatus == BED_VALID)
70 {
71 // initialize counts for each file to 0
72 vector<int> counts(_bam_files.size(), 0);
73 // get the BAM refId for this chrom.
74 int refId = reader.GetReferenceID(bed.chrom);
75 // set up a BamRegion to which to attempt to jump
76 BamRegion region(refId, (int)bed.start, refId, (int)bed.end);
77
78 // everything checks out, just iterate through specified region, counting alignments
79 if ( (refId != -1) && (reader.SetRegion(region)) ) {
80 BamAlignment al;
81 while ( reader.GetNextAlignment(al) )
82 {
83 bool duplicate = al.IsDuplicate();
84 bool failedQC = al.IsFailedQC();
85 if (_keepDuplicates) duplicate = false;
86 if (_keepFailedQC) failedQC = false;
87 // map qual must exceed minimum
88 if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) {
89 // ignore if not properly paired and we actually care.
90 if (_properOnly && !al.IsProperPair())
91 continue;
92
93 // lookup the offset of the file name and tabulate
94 //coverage for the appropriate file
95 counts[bamFileMap[al.Filename]]++;
96 }
97 }
98 }
99 // report the cov at this interval for each file and reset
100 _bed->reportBedTab(bed);
101 ReportCounts(counts);
102 bed = nullBed;
103 }
104 }
105 _bed->Close();
106 }
107 else {
108 cerr << "Could not find indexes." << endl;
109 reader.Close();
110 exit(1);
111 }
112 }
113 }
114
115
116 void MultiCovBam::LoadBamFileMap(void)
117 {
118 for (size_t i = 0; i < _bam_files.size(); ++i)
119 {
120 bamFileMap[_bam_files[i]] = i;
121 }
122 }
123
124 void MultiCovBam::ReportCounts(const vector<int> &counts)
125 {
126 for (size_t i = 0; i < counts.size(); ++i)
127 {
128 if (i < counts.size() - 1)
129 cout << counts[i] << "\t";
130 else
131 cout << counts[i];
132 }
133 cout << endl;
134 }