annotate BEDTools-Version-2.14.3/src/multiIntersectBed/multiIntersectBed.h @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 multiIntersectBed.h
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2010 - Aaron Quinlan, UVA
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 - Assaf Gordon, CSHL
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Quinlan Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 Department of Public Health Sciences
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 Center for Public Health Genomics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14 #ifndef MULTIINTERSECTBED_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 #define MULTIINTERSECTBED_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 #include <vector>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 #include <string>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 #include "bedFile.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 #include "genomeFile.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 #include "intervalItem.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 class MultiIntersectBed
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 private:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 vector<string> filenames;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 vector<string> titles;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 vector<BedFile*> input_files;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 vector<int> current_depth;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 vector<BED> current_item;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 std::ostream &output;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 INTERVALS_PRIORITY_QUEUE queue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 std::string current_chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 map<int, bool> files_with_coverage;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 int current_non_zero_inputs;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 bool print_empty_regions;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 bool haveTitles;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 GenomeFile* genome_sizes;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 std::string no_coverage_value;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 MultiIntersectBed(std::ostream& _output,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 const vector<string>& _filenames,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 const vector<string>& _titles,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 bool _print_empty_regions,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 const std::string& _genomeFileName,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 const std::string& _no_coverage_value);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 virtual ~MultiIntersectBed();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 // Combines all interval files
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 void MultiIntersect();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 // Print the header line: chrom/start/end + name of each bedgraph file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 void PrintHeader();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 private:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 // Open all input files, initialize "current_XXX" vectors
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 void OpenFiles();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 // Close the input files.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 void CloseFiles();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 Add an interval from BedGraph file 'index' into the queue.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 will only be added if it belongs to the current chromosome.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 If the interval was added (=consumed), the next interval will be read from the file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 using 'LoadNextItem'
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 void AddInterval(int index);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 Loads the next interval from Bed file 'index'.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 Stores it in 'current_bed_item' vector.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 void LoadNextItem(int index);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 Scans the 'current_bedgraph_item' vector,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 find the 'first' chromosome to use (different BedGraph files can start with different chromosomes).
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 std::string DetermineNextChrom();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 Returns 'true' if ALL intervals from ALL BedGraph files were used
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 bool AllFilesDone();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 Extract the next coordinate from the queue, and updates the current coverage information.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 If multiple interval share the same coordinate values, all of them are handled.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 If an END coordinate is consumed, the next interval (from the corresponding file) is read.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 CHRPOS ConsumeNextCoordinate();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 Updates the coverage information based on the given item.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 Item can be a START coordinate or an END coordiante.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 void UpdateInformation(const IntervalItem &item);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 prints chrom/start/end and the current depth coverage values of all the files.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 void PrintCoverage(CHRPOS start, CHRPOS end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 prints chrom/start/end and the ZERO depth coverage values of all the files.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 void PrintEmptyCoverage(CHRPOS start, CHRPOS end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 void DebugPrintQueue();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 #endif