0
|
1 /*****************************************************************************
|
|
2 multiBamCov.cpp
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licenced under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #include "lineFileUtilities.h"
|
|
13 #include "multiBamCov.h"
|
|
14 #include "api/BamMultiReader.h"
|
|
15
|
|
16
|
|
17 /*
|
|
18 Constructor
|
|
19 */
|
|
20 MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file,
|
|
21 int minQual, bool properOnly,
|
|
22 bool keepDuplicates, bool keepFailedQC)
|
|
23 :
|
|
24 _bam_files(bam_files),
|
|
25 _bed_file(bed_file),
|
|
26 _minQual(minQual),
|
|
27 _properOnly(properOnly),
|
|
28 _keepDuplicates(keepDuplicates),
|
|
29 _keepFailedQC(keepFailedQC)
|
|
30 {
|
|
31 _bed = new BedFile(_bed_file);
|
|
32 LoadBamFileMap();
|
|
33 }
|
|
34
|
|
35
|
|
36 /*
|
|
37 Destructor
|
|
38 */
|
|
39 MultiCovBam::~MultiCovBam(void)
|
|
40 {}
|
|
41
|
|
42
|
|
43
|
|
44 void MultiCovBam::CollectCoverage()
|
|
45 {
|
|
46 BamMultiReader reader;
|
|
47
|
|
48 if ( !reader.Open(_bam_files) )
|
|
49 {
|
|
50 cerr << "Could not open input BAM files." << endl;
|
|
51 exit(1);
|
|
52 }
|
|
53 else
|
|
54 {
|
|
55 // attempt to find index files
|
|
56 reader.LocateIndexes();
|
|
57
|
|
58 // if index data available for all BAM files, we can use SetRegion
|
|
59 if ( reader.HasIndexes() ) {
|
|
60 BED bed, nullBed;
|
|
61 int lineNum = 0;
|
|
62 BedLineStatus bedStatus;
|
|
63
|
|
64 _bed->Open();
|
|
65 // loop through each BED entry, jump to it,
|
|
66 // and collect coverage from each BAM
|
|
67 while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID)
|
|
68 {
|
|
69 if (bedStatus == BED_VALID)
|
|
70 {
|
|
71 // initialize counts for each file to 0
|
|
72 vector<int> counts(_bam_files.size(), 0);
|
|
73 // get the BAM refId for this chrom.
|
|
74 int refId = reader.GetReferenceID(bed.chrom);
|
|
75 // set up a BamRegion to which to attempt to jump
|
|
76 BamRegion region(refId, (int)bed.start, refId, (int)bed.end);
|
|
77
|
|
78 // everything checks out, just iterate through specified region, counting alignments
|
|
79 if ( (refId != -1) && (reader.SetRegion(region)) ) {
|
|
80 BamAlignment al;
|
|
81 while ( reader.GetNextAlignment(al) )
|
|
82 {
|
|
83 bool duplicate = al.IsDuplicate();
|
|
84 bool failedQC = al.IsFailedQC();
|
|
85 if (_keepDuplicates) duplicate = false;
|
|
86 if (_keepFailedQC) failedQC = false;
|
|
87 // map qual must exceed minimum
|
|
88 if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) {
|
|
89 // ignore if not properly paired and we actually care.
|
|
90 if (_properOnly && !al.IsProperPair())
|
|
91 continue;
|
|
92
|
|
93 // lookup the offset of the file name and tabulate
|
|
94 //coverage for the appropriate file
|
|
95 counts[bamFileMap[al.Filename]]++;
|
|
96 }
|
|
97 }
|
|
98 }
|
|
99 // report the cov at this interval for each file and reset
|
|
100 _bed->reportBedTab(bed);
|
|
101 ReportCounts(counts);
|
|
102 bed = nullBed;
|
|
103 }
|
|
104 }
|
|
105 _bed->Close();
|
|
106 }
|
|
107 else {
|
|
108 cerr << "Could not find indexes." << endl;
|
|
109 reader.Close();
|
|
110 exit(1);
|
|
111 }
|
|
112 }
|
|
113 }
|
|
114
|
|
115
|
|
116 void MultiCovBam::LoadBamFileMap(void)
|
|
117 {
|
|
118 for (size_t i = 0; i < _bam_files.size(); ++i)
|
|
119 {
|
|
120 bamFileMap[_bam_files[i]] = i;
|
|
121 }
|
|
122 }
|
|
123
|
|
124 void MultiCovBam::ReportCounts(const vector<int> &counts)
|
|
125 {
|
|
126 for (size_t i = 0; i < counts.size(); ++i)
|
|
127 {
|
|
128 if (i < counts.size() - 1)
|
|
129 cout << counts[i] << "\t";
|
|
130 else
|
|
131 cout << counts[i];
|
|
132 }
|
|
133 cout << endl;
|
|
134 }
|