Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 bedGraphFile.cpp | |
3 | |
4 (c) 2010 - Assaf Gordon | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #ifndef BEDGRAPHFILE_H | |
13 #define BEDGRAPHFILE_H | |
14 | |
15 #include "gzstream.h" | |
16 #include "lineFileUtilities.h" | |
17 #include "fileType.h" | |
18 #include <vector> | |
19 #include <map> | |
20 #include <set> | |
21 #include <string> | |
22 #include <iostream> | |
23 #include <fstream> | |
24 #include <sstream> | |
25 #include <cstring> | |
26 #include <algorithm> | |
27 #include <limits.h> | |
28 #include <stdint.h> | |
29 #include <cstdio> | |
30 | |
31 using namespace std; | |
32 | |
33 //************************************************* | |
34 // Data type tydedef | |
35 //************************************************* | |
36 #ifndef CHRPOS | |
37 typedef uint32_t CHRPOS; | |
38 #endif | |
39 | |
40 #ifndef DEPTH | |
41 typedef uint32_t DEPTH; | |
42 #endif | |
43 | |
44 /* | |
45 Structure for regular BedGraph records | |
46 */ | |
47 template <typename T> | |
48 class BEDGRAPH | |
49 { | |
50 public: | |
51 std::string chrom; | |
52 CHRPOS start; | |
53 CHRPOS end; | |
54 T depth; | |
55 | |
56 public: | |
57 typedef T DEPTH_TYPE; | |
58 // constructors | |
59 | |
60 // Null | |
61 BEDGRAPH() : | |
62 start(0), | |
63 end(0), | |
64 depth(T()) | |
65 {} | |
66 | |
67 // BEDGraph | |
68 BEDGRAPH(string _chrom, CHRPOS _start, CHRPOS _end, T _depth) : | |
69 chrom(_chrom), | |
70 start(_start), | |
71 end(_end), | |
72 depth(_depth) | |
73 {} | |
74 }; // BEDGraph | |
75 | |
76 typedef BEDGRAPH<int32_t> BEDGRAPH_INT; | |
77 typedef BEDGRAPH<std::string> BEDGRAPH_STR; | |
78 typedef BEDGRAPH<double> BEDGRAPH_FLOAT; | |
79 | |
80 template <typename T> | |
81 std::ostream& operator<< (std::ostream& strm, const BEDGRAPH<T>& bg) | |
82 { | |
83 strm << bg.chrom << "\t" | |
84 << bg.start << "\t" | |
85 << bg.end << "\t" | |
86 << bg.depth; | |
87 return strm; | |
88 } | |
89 | |
90 // enum to flag the state of a given line in a BEDGraph file. | |
91 enum BedGraphLineStatus | |
92 { | |
93 BEDGRAPH_INVALID = -1, | |
94 BEDGRAPH_HEADER = 0, | |
95 BEDGRAPH_BLANK = 1, | |
96 BEDGRAPH_VALID = 2 | |
97 }; | |
98 | |
99 | |
100 //************************************************ | |
101 // BedGraphFile Class methods and elements | |
102 //************************************************ | |
103 class BedGraphFile { | |
104 | |
105 public: | |
106 | |
107 // Constructor | |
108 BedGraphFile(string &); | |
109 | |
110 // Destructor | |
111 ~BedGraphFile(void); | |
112 | |
113 // Open a BEDGraph file for reading (creates an istream pointer) | |
114 void Open(void); | |
115 | |
116 // Close an opened BED file. | |
117 void Close(void); | |
118 | |
119 // Get the next BED entry in an opened BED file. | |
120 template <typename T> | |
121 BedGraphLineStatus GetNextBedGraph (BEDGRAPH<T> &bedgraph, int &lineNum) | |
122 { | |
123 // make sure there are still lines to process. | |
124 // if so, tokenize, validate and return the BED entry. | |
125 if (_bedGraphStream->good()) { | |
126 string bedGraphLine; | |
127 vector<string> bedGraphFields; | |
128 | |
129 // parse the bedStream pointer | |
130 getline(*_bedGraphStream, bedGraphLine); | |
131 if (_bedGraphStream->eof()) | |
132 return BEDGRAPH_INVALID; | |
133 if (_bedGraphStream->bad()) { | |
134 cerr << "Error while reading file '" << bedGraphFile << "' : " | |
135 << strerror(errno) << endl; | |
136 exit(1); | |
137 } | |
138 lineNum++; | |
139 | |
140 // split into a string vector. | |
141 Tokenize(bedGraphLine,bedGraphFields); | |
142 | |
143 // load the BED struct as long as it's a valid BED entry. | |
144 return parseLine(bedgraph, bedGraphFields, lineNum); | |
145 } | |
146 | |
147 // default if file is closed or EOF | |
148 return BEDGRAPH_INVALID; | |
149 } | |
150 | |
151 // the bedfile with which this instance is associated | |
152 string bedGraphFile; | |
153 | |
154 private: | |
155 // data | |
156 istream *_bedGraphStream; | |
157 | |
158 template <typename T> | |
159 BedGraphLineStatus parseLine (BEDGRAPH<T> &bg, const vector<string> &lineVector, int &lineNum) | |
160 { | |
161 if (lineVector.size() == 0) | |
162 return BEDGRAPH_BLANK; | |
163 | |
164 if (lineVector[0].find("track") != string::npos || | |
165 lineVector[0].find("browser") != string::npos || | |
166 lineVector[0].find("#") != string::npos) | |
167 return BEDGRAPH_HEADER; | |
168 | |
169 if (lineVector.size() != 4) | |
170 return BEDGRAPH_INVALID; | |
171 | |
172 bg.chrom = lineVector[0]; | |
173 | |
174 stringstream str_start(lineVector[1]); | |
175 if (! (str_start >> bg.start) ) { | |
176 cerr << "Input error, failed to extract start value from '" << lineVector[1] | |
177 << "' (column 2) in " << bedGraphFile << " line " << lineNum << endl; | |
178 exit(1); | |
179 } | |
180 | |
181 stringstream str_end(lineVector[2]); | |
182 if (! (str_end >> bg.end) ) { | |
183 cerr << "Input error, failed to extract end value from '" << lineVector[2] | |
184 << "' (column 3) in " << bedGraphFile << " line " << lineNum << endl; | |
185 exit(1); | |
186 } | |
187 | |
188 stringstream str_depth(lineVector[3]); | |
189 if (! (str_depth >> bg.depth) ) { | |
190 cerr << "Input error, failed to extract depth value from '" << lineVector[3] | |
191 << "' (column 4) in " << bedGraphFile << " line " << lineNum << endl; | |
192 exit(1); | |
193 } | |
194 | |
195 return BEDGRAPH_VALID; | |
196 } | |
197 }; | |
198 | |
199 #endif /* BEDFILE_H */ |