0
|
1 /*****************************************************************************
|
|
2 unionBedGraphs.h
|
|
3
|
|
4 (c) 2010 - Assaf Gordon, CSHL
|
|
5 - Aaron Quinlan, UVA
|
|
6 Hall Laboratory
|
|
7 Department of Biochemistry and Molecular Genetics
|
|
8 University of Virginia
|
|
9 aaronquinlan@gmail.com
|
|
10
|
|
11 Licenced under the GNU General Public License 2.0 license.
|
|
12 ******************************************************************************/
|
|
13 #ifndef UNIONBEDGRAPHS_H
|
|
14 #define UNIONBEDGRAPHS_H
|
|
15
|
|
16 #include <vector>
|
|
17 #include <string>
|
|
18 #include "bedGraphFile.h"
|
|
19 #include "genomeFile.h"
|
|
20 #include "intervalItem.h"
|
|
21
|
|
22 class UnionBedGraphs
|
|
23 {
|
|
24 private:
|
|
25 typedef BEDGRAPH_STR BEDGRAPH_TYPE;
|
|
26
|
|
27 vector<string> filenames;
|
|
28 vector<string> titles;
|
|
29
|
|
30 vector<BedGraphFile*> bedgraph_files;
|
|
31 vector<BEDGRAPH_TYPE::DEPTH_TYPE> current_depth;
|
|
32 vector<BEDGRAPH_TYPE> current_bedgraph_item;
|
|
33
|
|
34 std::ostream &output;
|
|
35
|
|
36 INTERVALS_PRIORITY_QUEUE queue;
|
|
37 std::string current_chrom;
|
|
38 int current_non_zero_inputs;
|
|
39 bool print_empty_regions;
|
|
40
|
|
41 GenomeFile* genome_sizes;
|
|
42
|
|
43 std::string no_coverage_value;
|
|
44
|
|
45 public:
|
|
46 UnionBedGraphs(std::ostream& _output,
|
|
47 const vector<string>& _filenames,
|
|
48 const vector<string>& _titles,
|
|
49 bool _print_empty_regions,
|
|
50 const std::string& _genomeFileName,
|
|
51 const std::string& _no_coverage_value);
|
|
52
|
|
53 virtual ~UnionBedGraphs();
|
|
54
|
|
55 // Combines all bedgraph files
|
|
56 void Union();
|
|
57
|
|
58 // Print the header line: chrom/start/end + name of each bedgraph file.
|
|
59 void PrintHeader();
|
|
60
|
|
61
|
|
62 private:
|
|
63
|
|
64 // Open all BedGraph files, initialize "current_XXX" vectors
|
|
65 void OpenBedgraphFiles();
|
|
66
|
|
67 // Close the BedGraph files.
|
|
68 void CloseBedgraphFiles();
|
|
69
|
|
70 /*
|
|
71 Add an interval from BedGraph file 'index' into the queue.
|
|
72 will only be added if it belongs to the current chromosome.
|
|
73
|
|
74 If the interval was added (=consumed), the next interval will be read from the file
|
|
75 using 'LoadNextBedgraphItem'
|
|
76 */
|
|
77 void AddInterval(int index);
|
|
78
|
|
79 /*
|
|
80 Loads the next interval from BedGraph file 'index'.
|
|
81 Stores it in 'current_bedgraph_item' vector.
|
|
82 */
|
|
83 void LoadNextBedgraphItem(int index);
|
|
84
|
|
85 /*
|
|
86 Scans the 'current_bedgraph_item' vector,
|
|
87 find the 'first' chromosome to use (different BedGraph files can start with different chromosomes).
|
|
88 */
|
|
89 std::string DetermineNextChrom();
|
|
90
|
|
91 /*
|
|
92 Returns 'true' if ALL intervals from ALL BedGraph files were used
|
|
93 */
|
|
94 bool AllFilesDone();
|
|
95
|
|
96 /*
|
|
97 Extract the next coordinate from the queue, and updates the current coverage information.
|
|
98 If multiple interval share the same coordinate values, all of them are handled.
|
|
99 If an END coordinate is consumed, the next interval (from the corresponding file) is read.
|
|
100 */
|
|
101 CHRPOS ConsumeNextCoordinate();
|
|
102
|
|
103 /*
|
|
104 Updates the coverage information based on the given item.
|
|
105 Item can be a START coordinate or an END coordiante.
|
|
106 */
|
|
107 void UpdateInformation(const IntervalItem &item);
|
|
108
|
|
109 /*
|
|
110 prints chrom/start/end and the current depth coverage values of all the files.
|
|
111 */
|
|
112 void PrintCoverage(CHRPOS start, CHRPOS end);
|
|
113
|
|
114 /*
|
|
115 prints chrom/start/end and the ZERO depth coverage values of all the files.
|
|
116 */
|
|
117 void PrintEmptyCoverage(CHRPOS start, CHRPOS end);
|
|
118
|
|
119 void DebugPrintQueue();
|
|
120 };
|
|
121
|
|
122
|
|
123 #endif
|