annotate BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 unionBedGraphs.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2010 - Assaf Gordon, CSHL
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 - Aaron Quinlan, UVA
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include <cassert>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14 #include <cstring>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 #include <cstdlib>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 #include <iostream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 #include <algorithm>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 #include "bedFile.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 #include "multiIntersectBed.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 using namespace std;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 MultiIntersectBed::MultiIntersectBed(std::ostream& _output,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 const vector<string>& _filenames,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 const vector<string>& _titles,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 bool _print_empty_regions,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 const std::string& _genome_size_filename,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 const std::string& _no_coverage_value ) :
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 filenames(_filenames),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 titles(_titles),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 output(_output),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 current_non_zero_inputs(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 print_empty_regions(_print_empty_regions),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 haveTitles(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 genome_sizes(NULL),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 no_coverage_value(_no_coverage_value)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 if (print_empty_regions) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 assert(!_genome_size_filename.empty());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 genome_sizes = new GenomeFile(_genome_size_filename);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 if (titles.size() > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 haveTitles = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 MultiIntersectBed::~MultiIntersectBed() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 CloseFiles();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 if (genome_sizes) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 delete genome_sizes;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 genome_sizes = NULL ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 void MultiIntersectBed::MultiIntersect() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 OpenFiles();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 // Add the first interval from each file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 for(size_t i = 0;i < input_files.size(); ++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 LoadNextItem(i);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 // Chromosome loop - once per chromosome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 do {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 // Find the first chromosome to use
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 current_chrom = DetermineNextChrom();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 // Populate the queue with initial values from all files
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 // (if they belong to the correct chromosome)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 for(size_t i = 0; i < input_files.size(); ++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 AddInterval(i);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 CHRPOS current_start = ConsumeNextCoordinate();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 // User wanted empty regions, and the first coordinate is not 0 - print a dummy empty coverage
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 if (print_empty_regions && current_start > 0)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 PrintEmptyCoverage(0,current_start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 // Intervals loop - until all intervals (of current chromosome) from all files are used.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 do {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 CHRPOS current_end = queue.top().coord;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 PrintCoverage(current_start, current_end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 current_start = ConsumeNextCoordinate();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 } while (!queue.empty());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 // print a dummy empty coverage
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 if (print_empty_regions) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 if (current_start < chrom_size)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 PrintEmptyCoverage(current_start, chrom_size);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 } while (!AllFilesDone());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 CHRPOS MultiIntersectBed::ConsumeNextCoordinate() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 assert(!queue.empty());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 CHRPOS new_position = queue.top().coord;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 do {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 IntervalItem item = queue.top();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 UpdateInformation(item);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 queue.pop();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 } while (!queue.empty() && queue.top().coord == new_position);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 return new_position;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 void MultiIntersectBed::UpdateInformation(const IntervalItem &item) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 // Update the depth coverage for this file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 // Which coordinate is it - start or end?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 switch (item.coord_type)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 case START:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 current_depth[item.source_index] = 1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 current_non_zero_inputs++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 files_with_coverage[item.source_index] = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 case END:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 //Read the next interval from this file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 AddInterval(item.source_index);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 current_depth[item.source_index] = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 current_non_zero_inputs--;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 files_with_coverage.erase(item.source_index);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 default:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 assert(0);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 void MultiIntersectBed::AddInterval(int index) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 assert(static_cast<unsigned int>(index) < input_files.size());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 //This file has no more intervals
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 if (current_item[index].chrom.empty())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 return;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 //If the next interval belongs to a different chrom, don't add it
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 if (current_item[index].chrom!=current_chrom)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 return;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 const BED &bed(current_item[index]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 IntervalItem start_item(index, START, bed.start);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 IntervalItem end_item(index, END, bed.end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 queue.push(start_item);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 queue.push(end_item);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 LoadNextItem(index);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 void MultiIntersectBed::PrintHeader() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 output << "chrom\tstart\tend\tnum\tlist" ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 for (size_t i=0;i<titles.size();++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 output << "\t" <<titles[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 output << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 void MultiIntersectBed::PrintCoverage(CHRPOS start, CHRPOS end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 if ( current_non_zero_inputs == 0 && ! print_empty_regions )
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 return ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 output << current_chrom << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 << start << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 << end << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 << current_non_zero_inputs << "\t";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 ostringstream file_list_string;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 ostringstream file_bool_string;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 int depth_count = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 for (size_t i = 0; i < current_depth.size(); ++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186 if (current_depth[i] > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 if (depth_count < current_non_zero_inputs - 1) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 if (!haveTitles)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 file_list_string << i+1 << ",";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191 file_list_string << titles[i] << ",";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194 if (!haveTitles)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 file_list_string << i+1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197 file_list_string << titles[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 depth_count++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201 file_bool_string << "\t" << current_depth[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203 if (current_non_zero_inputs > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204 cout << file_list_string.str() << file_bool_string.str() << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 cout << "none" << file_bool_string.str() << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212 void MultiIntersectBed::PrintEmptyCoverage(CHRPOS start, CHRPOS end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213 output << current_chrom << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214 << start << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 << end << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216 << "0" << "\t" << "none";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218 for (size_t i=0;i<current_depth.size();++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 output << "\t0";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221 output << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225 void MultiIntersectBed::LoadNextItem(int index) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 assert(static_cast<unsigned int>(index) < input_files.size());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228 current_item[index].chrom="";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 BedFile *file = input_files[index];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231 BED merged_bed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 // TO DO: Do the mergeing on the fly. How best to do this?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 // IDEA: Implement a Merge class with GetNextMerge element.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 while (file->GetNextMergedBed(merged_bed, lineNum))
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241 current_item[index] = merged_bed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242 break;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247 bool MultiIntersectBed::AllFilesDone() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248 for (size_t i=0;i<current_item.size();++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249 if (!current_item[i].chrom.empty())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 string MultiIntersectBed::DetermineNextChrom() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256 string next_chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257 for (size_t i=0;i<current_item.size();++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258 if (current_item[i].chrom.empty())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259 continue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261 if (next_chrom.empty())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262 next_chrom = current_item[i].chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264 if (current_item[i].chrom < next_chrom)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265 next_chrom = current_item[i].chrom ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267 return next_chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271 void MultiIntersectBed::OpenFiles() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272 for (size_t i = 0; i < filenames.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273 BedFile *file = new BedFile(filenames[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274 file->Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 input_files.push_back(file);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276 current_depth.push_back(0);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278 current_item.resize(filenames.size());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 void MultiIntersectBed::CloseFiles() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283 for (size_t i=0; i < input_files.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 BedFile *file = input_files[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285 delete file;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286 input_files[i] = NULL ;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288 input_files.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289 }