annotate BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 multiBamCov.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "multiBamCov.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14 #include "api/BamMultiReader.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 int minQual, bool properOnly,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 bool keepDuplicates, bool keepFailedQC)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 :
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 _bam_files(bam_files),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 _bed_file(bed_file),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 _minQual(minQual),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 _properOnly(properOnly),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 _keepDuplicates(keepDuplicates),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 _keepFailedQC(keepFailedQC)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 _bed = new BedFile(_bed_file);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 LoadBamFileMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 MultiCovBam::~MultiCovBam(void)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 void MultiCovBam::CollectCoverage()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 BamMultiReader reader;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 if ( !reader.Open(_bam_files) )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 cerr << "Could not open input BAM files." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 // attempt to find index files
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 reader.LocateIndexes();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 // if index data available for all BAM files, we can use SetRegion
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 if ( reader.HasIndexes() ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 BED bed, nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 _bed->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 // loop through each BED entry, jump to it,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 // and collect coverage from each BAM
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 if (bedStatus == BED_VALID)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 // initialize counts for each file to 0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 vector<int> counts(_bam_files.size(), 0);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 // get the BAM refId for this chrom.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 int refId = reader.GetReferenceID(bed.chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 // set up a BamRegion to which to attempt to jump
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 BamRegion region(refId, (int)bed.start, refId, (int)bed.end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 // everything checks out, just iterate through specified region, counting alignments
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 if ( (refId != -1) && (reader.SetRegion(region)) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 BamAlignment al;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 while ( reader.GetNextAlignment(al) )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 bool duplicate = al.IsDuplicate();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 bool failedQC = al.IsFailedQC();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 if (_keepDuplicates) duplicate = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 if (_keepFailedQC) failedQC = false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 // map qual must exceed minimum
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 // ignore if not properly paired and we actually care.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 if (_properOnly && !al.IsProperPair())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 continue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 // lookup the offset of the file name and tabulate
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 //coverage for the appropriate file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 counts[bamFileMap[al.Filename]]++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 // report the cov at this interval for each file and reset
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 _bed->reportBedTab(bed);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 ReportCounts(counts);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 bed = nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 _bed->Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 cerr << "Could not find indexes." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 reader.Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 void MultiCovBam::LoadBamFileMap(void)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 for (size_t i = 0; i < _bam_files.size(); ++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 bamFileMap[_bam_files[i]] = i;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 void MultiCovBam::ReportCounts(const vector<int> &counts)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 for (size_t i = 0; i < counts.size(); ++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 if (i < counts.size() - 1)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 cout << counts[i] << "\t";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 cout << counts[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 cout << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 }