annotate BEDTools-Version-2.14.3/src/genomeCoverageBed/genomeCoverageBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 genomeCoverage.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "genomeCoverageBed.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 bool eachBase, bool startSites,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 bool bedGraph, bool bedGraphAll,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 int max, float scale,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 bool bamInput, bool obeySplits,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 bool filterByStrand, string requestedStrand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 bool only_5p_end, bool only_3p_end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 bool eachBaseZeroBased,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 bool add_gb_track_line, string gb_track_line_opts) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 _bedFile = bedFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 _genomeFile = genomeFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 _eachBase = eachBase;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 _eachBaseZeroBased = eachBaseZeroBased;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 _startSites = startSites;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 _bedGraph = bedGraph;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 _bedGraphAll = bedGraphAll;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 _max = max;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 _scale = scale;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 _bamInput = bamInput;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 _obeySplits = obeySplits;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 _filterByStrand = filterByStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 _requestedStrand = requestedStrand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 _only_3p_end = only_3p_end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 _only_5p_end = only_5p_end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 _add_gb_track_line = add_gb_track_line;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 _gb_track_line_opts = gb_track_line_opts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 _currChromName = "";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 _currChromSize = 0 ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 if (_bamInput == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 _genome = new GenomeFile(genomeFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 PrintTrackDefinitionLine();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 if (_bamInput == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 _bed = new BedFile(bedFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 CoverageBed();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 CoverageBam(_bedFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 void BedGenomeCoverage::PrintTrackDefinitionLine()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 //Print Track Definition line (if requested)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 if ( (_bedGraph||_bedGraphAll) && _add_gb_track_line) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 string line = "track type=bedGraph";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 if (!_gb_track_line_opts.empty()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 line += " " ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 line += _gb_track_line_opts ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 cout << line << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 BedGenomeCoverage::~BedGenomeCoverage(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 delete _bed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 delete _genome;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 void BedGenomeCoverage::ResetChromCoverage() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 _currChromName = "";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 _currChromSize = 0 ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 std::vector<DEPTH>().swap(_currChromCoverage);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 void BedGenomeCoverage::StartNewChrom(const string& newChrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 // If we've moved beyond the first encountered chromosomes,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 // process the results of the previous chromosome.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 if (_currChromName.length() > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 ReportChromCoverage(_currChromCoverage, _currChromSize,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 _currChromName, _currChromDepthHist);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 // empty the previous chromosome and reserve new
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 std::vector<DEPTH>().swap(_currChromCoverage);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 if (_visitedChromosomes.find(newChrom) != _visitedChromosomes.end()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 cerr << "Input error: Chromosome " << _currChromName
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 << " found in non-sequential lines. This suggests that the input file is not sorted correctly." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 _visitedChromosomes.insert(newChrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 _currChromName = newChrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 // get the current chrom size and allocate space
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 _currChromSize = _genome->getChromSize(_currChromName);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 if (_currChromSize >= 0)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 _currChromCoverage.resize(_currChromSize);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 cerr << "Input error: Chromosome " << _currChromName << " found in your input file but not in your genome file." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 void BedGenomeCoverage::AddCoverage(int start, int end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 // process the first line for this chromosome.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 // make sure the coordinates fit within the chrom
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 if (start < _currChromSize)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 _currChromCoverage[start].starts++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 if (end < _currChromSize)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 _currChromCoverage[end].ends++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 _currChromCoverage[_currChromSize-1].ends++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 void BedGenomeCoverage::AddBlockedCoverage(const vector<BED> &bedBlocks) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 vector<BED>::const_iterator bedItr = bedBlocks.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 vector<BED>::const_iterator bedEnd = bedBlocks.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 // the end - 1 must be done because BamAncillary::getBamBlocks
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 // returns ends uncorrected for the genomeCoverageBed data structure.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 // ugly, but necessary.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 AddCoverage(bedItr->start, bedItr->end - 1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 void BedGenomeCoverage::CoverageBed() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 BED a, nullBed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 int lineNum = 0; // current input line number
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 ResetChromCoverage();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 _bed->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 while ( (bedStatus = _bed->GetNextBed(a, lineNum)) != BED_INVALID ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 if (_filterByStrand == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 if (a.strand.empty()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 cerr << "Input error: Interval is missing a strand value on line " << lineNum << "." <<endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 if ( ! (a.strand == "-" || a.strand == "+") ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163 cerr << "Input error: Invalid strand value (" << a.strand << ") on line " << lineNum << "." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 // skip if the strand is not what the user requested.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 if (a.strand != _requestedStrand)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 continue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171 // are we on a new chromosome?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 if (a.chrom != _currChromName)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 StartNewChrom(a.chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 if (_obeySplits == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 bedVector bedBlocks; // vec to store the discrete BED "blocks"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 splitBedIntoBlocks(a, lineNum, bedBlocks);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 AddBlockedCoverage(bedBlocks);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 else if (_only_5p_end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 int pos = ( a.strand=="+" ) ? a.start : a.end-1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 AddCoverage(pos,pos);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 else if (_only_3p_end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185 int pos = ( a.strand=="-" ) ? a.start : a.end-1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186 AddCoverage(pos,pos);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 AddCoverage(a.start, a.end-1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192 _bed->Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 PrintFinalCoverage();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197 void BedGenomeCoverage::PrintFinalCoverage()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201 // process the results of the last chromosome.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202 ReportChromCoverage(_currChromCoverage, _currChromSize,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203 _currChromName, _currChromDepthHist);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204 if (_eachBase == false && _bedGraph == false && _bedGraphAll == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205 ReportGenomeCoverage(_currChromDepthHist);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210 void BedGenomeCoverage::CoverageBam(string bamFile) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212 ResetChromCoverage();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214 // open the BAM file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 BamReader reader;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216 reader.Open(bamFile);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218 // get header & reference information
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 string header = reader.GetHeaderText();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220 RefVector refs = reader.GetReferenceData();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222 // load the BAM header references into a BEDTools "genome file"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223 _genome = new GenomeFile(refs);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224 // convert each aligned BAM entry to BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225 // and compute coverage on B
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 BamAlignment bam;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227 while (reader.GetNextAlignment(bam)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228 // skip if the read is unaligned
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229 if (bam.IsMapped() == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 continue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 // skip if we care about strands and the strand isn't what
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233 // the user wanted
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 if ( (_filterByStrand == true) &&
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 ((_requestedStrand == "-") != bam.IsReverseStrand()) )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 continue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238 // extract the chrom, start and end from the BAM alignment
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 string chrom(refs.at(bam.RefID).RefName);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 CHRPOS start = bam.Position;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241 CHRPOS end = bam.GetEndPosition(false, false) - 1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 // are we on a new chromosome?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244 if ( chrom != _currChromName )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245 StartNewChrom(chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247 // add coverage accordingly.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248 if (_obeySplits) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249 bedVector bedBlocks;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250 // since we are counting coverage, we do want to split blocks when a
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251 // deletion (D) CIGAR op is encountered (hence the true for the last parm)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252 getBamBlocks(bam, refs, bedBlocks, true);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253 AddBlockedCoverage(bedBlocks);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 else if (_only_5p_end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256 int pos = ( !bam.IsReverseStrand() ) ? start : end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257 AddCoverage(pos,pos);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259 else if (_only_3p_end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260 int pos = ( bam.IsReverseStrand() ) ? start : end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261 AddCoverage(pos,pos);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264 AddCoverage(start, end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266 // close the BAM
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267 reader.Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268 PrintFinalCoverage();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272 void BedGenomeCoverage::ReportChromCoverage(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom, chromHistMap &chromDepthHist) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274 if (_eachBase) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 int depth = 0; // initialize the depth
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276 int offset = (_eachBaseZeroBased)?0:1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277 for (int pos = 0; pos < chromSize; pos++) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 depth += chromCov[pos].starts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280 // report the depth for this position.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281 if (depth>0 || !_eachBaseZeroBased)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 cout << chrom << "\t" << pos+offset << "\t" << depth * _scale << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283 depth = depth - chromCov[pos].ends;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286 else if (_bedGraph == true || _bedGraphAll == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287 ReportChromCoverageBedGraph(chromCov, chromSize, chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
290
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
291 int depth = 0; // initialize the depth
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
292
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
293 for (int pos = 0; pos < chromSize; pos++) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
294
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
295 depth += chromCov[pos].starts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
296
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
297 // add the depth at this position to the depth histogram
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
298 // for this chromosome. if the depth is greater than the
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
299 // maximum bin requested, then readjust the depth to be the max
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
300 if (depth >= _max) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
301 chromDepthHist[chrom][_max]++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
302 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
303 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
304 chromDepthHist[chrom][depth]++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
305 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
306 depth = depth - chromCov[pos].ends;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
307 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
308 // report the histogram for each chromosome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
309 histMap::const_iterator depthIt = chromDepthHist[chrom].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
310 histMap::const_iterator depthEnd = chromDepthHist[chrom].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
311 for (; depthIt != depthEnd; ++depthIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
312 int depth = depthIt->first;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
313 unsigned int numBasesAtDepth = depthIt->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
314 cout << chrom << "\t" << depth << "\t" << numBasesAtDepth << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
315 << chromSize << "\t" << (float) ((float)numBasesAtDepth / (float)chromSize) << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
316 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
317 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
318 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
319
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
320
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
321
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
322 void BedGenomeCoverage::ReportGenomeCoverage(chromHistMap &chromDepthHist) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
323
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
324 // get the list of chromosome names in the genome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
325 vector<string> chromList = _genome->getChromList();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
326
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
327 unsigned int genomeSize = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
328 vector<string>::const_iterator chromItr = chromList.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
329 vector<string>::const_iterator chromEnd = chromList.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
330 for (; chromItr != chromEnd; ++chromItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
331 string chrom = *chromItr;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
332 genomeSize += _genome->getChromSize(chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
333 // if there were no reads for a give chromosome, then
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
334 // add the length of the chrom to the 0 bin.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
335 if ( chromDepthHist.find(chrom) == chromDepthHist.end() ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
336 chromDepthHist[chrom][0] += _genome->getChromSize(chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
337 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
338 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
339
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
340 histMap genomeHist; // depth histogram for the entire genome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
341
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
342 // loop through each chromosome and add the depth and number of bases at each depth
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
343 // to the aggregate histogram for the entire genome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
344 for (chromHistMap::iterator chromIt = chromDepthHist.begin(); chromIt != chromDepthHist.end(); ++chromIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
345 string chrom = chromIt->first;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
346 for (histMap::iterator depthIt = chromDepthHist[chrom].begin(); depthIt != chromDepthHist[chrom].end(); ++depthIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
347 int depth = depthIt->first;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
348 unsigned int numBasesAtDepth = depthIt->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
349 genomeHist[depth] += numBasesAtDepth;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
350 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
351 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
352
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
353 // loop through the depths for the entire genome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
354 // and report the number and fraction of bases in
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
355 // the entire genome that are at said depth.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
356 for (histMap::iterator genomeDepthIt = genomeHist.begin(); genomeDepthIt != genomeHist.end(); ++genomeDepthIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
357 int depth = genomeDepthIt->first;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
358 unsigned int numBasesAtDepth = genomeDepthIt->second;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
359
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
360 cout << "genome" << "\t" << depth << "\t" << numBasesAtDepth << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
361 << genomeSize << "\t" << (float) ((float)numBasesAtDepth / (float)genomeSize) << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
362 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
363 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
364
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
365
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
366 void BedGenomeCoverage::ReportChromCoverageBedGraph(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
367
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
368 int depth = 0; // initialize the depth
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
369 int lastStart = -1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
370 int lastDepth = -1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
371
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
372 for (int pos = 0; pos < chromSize; pos++) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
373 depth += chromCov[pos].starts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
374
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
375 if (depth != lastDepth) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
376 // Coverage depth has changed, print the last interval coverage (if any)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
377 // Print if:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
378 // (1) depth>0 (the default running mode),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
379 // (2) depth==0 and the user requested to print zero covered regions (_bedGraphAll)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
380 if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
381 cout << chrom << "\t" << lastStart << "\t" << pos << "\t" << lastDepth * _scale << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
382 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
383 //Set current position as the new interval start + depth
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
384 lastDepth = depth;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
385 lastStart = pos;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
386 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
387 // Default: the depth has not changed, so we will not print anything.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
388 // Proceed until the depth changes.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
389 // Update depth
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
390 depth = depth - chromCov[pos].ends;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
391 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
392 //Print information about the last position
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
393 if ( (lastDepth != -1) && (lastDepth > 0 || _bedGraphAll) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
394 cout << chrom << "\t" << lastStart << "\t" << chromSize << "\t" << lastDepth * _scale << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
395 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
396 }