Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/multiBamCov/multiBamCov.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 multiBamCov.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include "lineFileUtilities.h" | |
13 #include "multiBamCov.h" | |
14 #include "api/BamMultiReader.h" | |
15 | |
16 | |
17 /* | |
18 Constructor | |
19 */ | |
20 MultiCovBam::MultiCovBam(const vector<string> &bam_files, const string bed_file, | |
21 int minQual, bool properOnly, | |
22 bool keepDuplicates, bool keepFailedQC) | |
23 : | |
24 _bam_files(bam_files), | |
25 _bed_file(bed_file), | |
26 _minQual(minQual), | |
27 _properOnly(properOnly), | |
28 _keepDuplicates(keepDuplicates), | |
29 _keepFailedQC(keepFailedQC) | |
30 { | |
31 _bed = new BedFile(_bed_file); | |
32 LoadBamFileMap(); | |
33 } | |
34 | |
35 | |
36 /* | |
37 Destructor | |
38 */ | |
39 MultiCovBam::~MultiCovBam(void) | |
40 {} | |
41 | |
42 | |
43 | |
44 void MultiCovBam::CollectCoverage() | |
45 { | |
46 BamMultiReader reader; | |
47 | |
48 if ( !reader.Open(_bam_files) ) | |
49 { | |
50 cerr << "Could not open input BAM files." << endl; | |
51 exit(1); | |
52 } | |
53 else | |
54 { | |
55 // attempt to find index files | |
56 reader.LocateIndexes(); | |
57 | |
58 // if index data available for all BAM files, we can use SetRegion | |
59 if ( reader.HasIndexes() ) { | |
60 BED bed, nullBed; | |
61 int lineNum = 0; | |
62 BedLineStatus bedStatus; | |
63 | |
64 _bed->Open(); | |
65 // loop through each BED entry, jump to it, | |
66 // and collect coverage from each BAM | |
67 while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) | |
68 { | |
69 if (bedStatus == BED_VALID) | |
70 { | |
71 // initialize counts for each file to 0 | |
72 vector<int> counts(_bam_files.size(), 0); | |
73 // get the BAM refId for this chrom. | |
74 int refId = reader.GetReferenceID(bed.chrom); | |
75 // set up a BamRegion to which to attempt to jump | |
76 BamRegion region(refId, (int)bed.start, refId, (int)bed.end); | |
77 | |
78 // everything checks out, just iterate through specified region, counting alignments | |
79 if ( (refId != -1) && (reader.SetRegion(region)) ) { | |
80 BamAlignment al; | |
81 while ( reader.GetNextAlignment(al) ) | |
82 { | |
83 bool duplicate = al.IsDuplicate(); | |
84 bool failedQC = al.IsFailedQC(); | |
85 if (_keepDuplicates) duplicate = false; | |
86 if (_keepFailedQC) failedQC = false; | |
87 // map qual must exceed minimum | |
88 if ((al.MapQuality >= _minQual) && (!duplicate) && (!failedQC)) { | |
89 // ignore if not properly paired and we actually care. | |
90 if (_properOnly && !al.IsProperPair()) | |
91 continue; | |
92 | |
93 // lookup the offset of the file name and tabulate | |
94 //coverage for the appropriate file | |
95 counts[bamFileMap[al.Filename]]++; | |
96 } | |
97 } | |
98 } | |
99 // report the cov at this interval for each file and reset | |
100 _bed->reportBedTab(bed); | |
101 ReportCounts(counts); | |
102 bed = nullBed; | |
103 } | |
104 } | |
105 _bed->Close(); | |
106 } | |
107 else { | |
108 cerr << "Could not find indexes." << endl; | |
109 reader.Close(); | |
110 exit(1); | |
111 } | |
112 } | |
113 } | |
114 | |
115 | |
116 void MultiCovBam::LoadBamFileMap(void) | |
117 { | |
118 for (size_t i = 0; i < _bam_files.size(); ++i) | |
119 { | |
120 bamFileMap[_bam_files[i]] = i; | |
121 } | |
122 } | |
123 | |
124 void MultiCovBam::ReportCounts(const vector<int> &counts) | |
125 { | |
126 for (size_t i = 0; i < counts.size(); ++i) | |
127 { | |
128 if (i < counts.size() - 1) | |
129 cout << counts[i] << "\t"; | |
130 else | |
131 cout << counts[i]; | |
132 } | |
133 cout << endl; | |
134 } |