annotate BEDTools-Version-2.14.3/src/unionBedGraphs/unionBedGraphs.h @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 unionBedGraphs.h
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2010 - Assaf Gordon, CSHL
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 - Aaron Quinlan, UVA
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #ifndef UNIONBEDGRAPHS_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14 #define UNIONBEDGRAPHS_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 #include <vector>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 #include <string>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 #include "bedGraphFile.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 #include "genomeFile.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 #include "intervalItem.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 class UnionBedGraphs
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 private:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 typedef BEDGRAPH_STR BEDGRAPH_TYPE;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 vector<string> filenames;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 vector<string> titles;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 vector<BedGraphFile*> bedgraph_files;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 vector<BEDGRAPH_TYPE::DEPTH_TYPE> current_depth;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 vector<BEDGRAPH_TYPE> current_bedgraph_item;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 std::ostream &output;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 INTERVALS_PRIORITY_QUEUE queue;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 std::string current_chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 int current_non_zero_inputs;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 bool print_empty_regions;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 GenomeFile* genome_sizes;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 std::string no_coverage_value;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 UnionBedGraphs(std::ostream& _output,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 const vector<string>& _filenames,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 const vector<string>& _titles,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 bool _print_empty_regions,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 const std::string& _genomeFileName,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 const std::string& _no_coverage_value);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 virtual ~UnionBedGraphs();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 // Combines all bedgraph files
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 void Union();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 // Print the header line: chrom/start/end + name of each bedgraph file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 void PrintHeader();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 private:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 // Open all BedGraph files, initialize "current_XXX" vectors
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 void OpenBedgraphFiles();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 // Close the BedGraph files.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 void CloseBedgraphFiles();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 Add an interval from BedGraph file 'index' into the queue.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 will only be added if it belongs to the current chromosome.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 If the interval was added (=consumed), the next interval will be read from the file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 using 'LoadNextBedgraphItem'
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 void AddInterval(int index);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 Loads the next interval from BedGraph file 'index'.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 Stores it in 'current_bedgraph_item' vector.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 void LoadNextBedgraphItem(int index);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 Scans the 'current_bedgraph_item' vector,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 find the 'first' chromosome to use (different BedGraph files can start with different chromosomes).
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 std::string DetermineNextChrom();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 Returns 'true' if ALL intervals from ALL BedGraph files were used
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 bool AllFilesDone();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 Extract the next coordinate from the queue, and updates the current coverage information.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 If multiple interval share the same coordinate values, all of them are handled.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 If an END coordinate is consumed, the next interval (from the corresponding file) is read.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 CHRPOS ConsumeNextCoordinate();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 Updates the coverage information based on the given item.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 Item can be a START coordinate or an END coordiante.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 void UpdateInformation(const IntervalItem &item);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 prints chrom/start/end and the current depth coverage values of all the files.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 void PrintCoverage(CHRPOS start, CHRPOS end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 prints chrom/start/end and the ZERO depth coverage values of all the files.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 void PrintEmptyCoverage(CHRPOS start, CHRPOS end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 void DebugPrintQueue();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 #endif