0
|
1 /*****************************************************************************
|
|
2 bedGraphFile.cpp
|
|
3
|
|
4 (c) 2010 - Assaf Gordon
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licenced under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #ifndef BEDGRAPHFILE_H
|
|
13 #define BEDGRAPHFILE_H
|
|
14
|
|
15 #include "gzstream.h"
|
|
16 #include "lineFileUtilities.h"
|
|
17 #include "fileType.h"
|
|
18 #include <vector>
|
|
19 #include <map>
|
|
20 #include <set>
|
|
21 #include <string>
|
|
22 #include <iostream>
|
|
23 #include <fstream>
|
|
24 #include <sstream>
|
|
25 #include <cstring>
|
|
26 #include <algorithm>
|
|
27 #include <limits.h>
|
|
28 #include <stdint.h>
|
|
29 #include <cstdio>
|
|
30
|
|
31 using namespace std;
|
|
32
|
|
33 //*************************************************
|
|
34 // Data type tydedef
|
|
35 //*************************************************
|
|
36 #ifndef CHRPOS
|
|
37 typedef uint32_t CHRPOS;
|
|
38 #endif
|
|
39
|
|
40 #ifndef DEPTH
|
|
41 typedef uint32_t DEPTH;
|
|
42 #endif
|
|
43
|
|
44 /*
|
|
45 Structure for regular BedGraph records
|
|
46 */
|
|
47 template <typename T>
|
|
48 class BEDGRAPH
|
|
49 {
|
|
50 public:
|
|
51 std::string chrom;
|
|
52 CHRPOS start;
|
|
53 CHRPOS end;
|
|
54 T depth;
|
|
55
|
|
56 public:
|
|
57 typedef T DEPTH_TYPE;
|
|
58 // constructors
|
|
59
|
|
60 // Null
|
|
61 BEDGRAPH() :
|
|
62 start(0),
|
|
63 end(0),
|
|
64 depth(T())
|
|
65 {}
|
|
66
|
|
67 // BEDGraph
|
|
68 BEDGRAPH(string _chrom, CHRPOS _start, CHRPOS _end, T _depth) :
|
|
69 chrom(_chrom),
|
|
70 start(_start),
|
|
71 end(_end),
|
|
72 depth(_depth)
|
|
73 {}
|
|
74 }; // BEDGraph
|
|
75
|
|
76 typedef BEDGRAPH<int32_t> BEDGRAPH_INT;
|
|
77 typedef BEDGRAPH<std::string> BEDGRAPH_STR;
|
|
78 typedef BEDGRAPH<double> BEDGRAPH_FLOAT;
|
|
79
|
|
80 template <typename T>
|
|
81 std::ostream& operator<< (std::ostream& strm, const BEDGRAPH<T>& bg)
|
|
82 {
|
|
83 strm << bg.chrom << "\t"
|
|
84 << bg.start << "\t"
|
|
85 << bg.end << "\t"
|
|
86 << bg.depth;
|
|
87 return strm;
|
|
88 }
|
|
89
|
|
90 // enum to flag the state of a given line in a BEDGraph file.
|
|
91 enum BedGraphLineStatus
|
|
92 {
|
|
93 BEDGRAPH_INVALID = -1,
|
|
94 BEDGRAPH_HEADER = 0,
|
|
95 BEDGRAPH_BLANK = 1,
|
|
96 BEDGRAPH_VALID = 2
|
|
97 };
|
|
98
|
|
99
|
|
100 //************************************************
|
|
101 // BedGraphFile Class methods and elements
|
|
102 //************************************************
|
|
103 class BedGraphFile {
|
|
104
|
|
105 public:
|
|
106
|
|
107 // Constructor
|
|
108 BedGraphFile(string &);
|
|
109
|
|
110 // Destructor
|
|
111 ~BedGraphFile(void);
|
|
112
|
|
113 // Open a BEDGraph file for reading (creates an istream pointer)
|
|
114 void Open(void);
|
|
115
|
|
116 // Close an opened BED file.
|
|
117 void Close(void);
|
|
118
|
|
119 // Get the next BED entry in an opened BED file.
|
|
120 template <typename T>
|
|
121 BedGraphLineStatus GetNextBedGraph (BEDGRAPH<T> &bedgraph, int &lineNum)
|
|
122 {
|
|
123 // make sure there are still lines to process.
|
|
124 // if so, tokenize, validate and return the BED entry.
|
|
125 if (_bedGraphStream->good()) {
|
|
126 string bedGraphLine;
|
|
127 vector<string> bedGraphFields;
|
|
128
|
|
129 // parse the bedStream pointer
|
|
130 getline(*_bedGraphStream, bedGraphLine);
|
|
131 if (_bedGraphStream->eof())
|
|
132 return BEDGRAPH_INVALID;
|
|
133 if (_bedGraphStream->bad()) {
|
|
134 cerr << "Error while reading file '" << bedGraphFile << "' : "
|
|
135 << strerror(errno) << endl;
|
|
136 exit(1);
|
|
137 }
|
|
138 lineNum++;
|
|
139
|
|
140 // split into a string vector.
|
|
141 Tokenize(bedGraphLine,bedGraphFields);
|
|
142
|
|
143 // load the BED struct as long as it's a valid BED entry.
|
|
144 return parseLine(bedgraph, bedGraphFields, lineNum);
|
|
145 }
|
|
146
|
|
147 // default if file is closed or EOF
|
|
148 return BEDGRAPH_INVALID;
|
|
149 }
|
|
150
|
|
151 // the bedfile with which this instance is associated
|
|
152 string bedGraphFile;
|
|
153
|
|
154 private:
|
|
155 // data
|
|
156 istream *_bedGraphStream;
|
|
157
|
|
158 template <typename T>
|
|
159 BedGraphLineStatus parseLine (BEDGRAPH<T> &bg, const vector<string> &lineVector, int &lineNum)
|
|
160 {
|
|
161 if (lineVector.size() == 0)
|
|
162 return BEDGRAPH_BLANK;
|
|
163
|
|
164 if (lineVector[0].find("track") != string::npos ||
|
|
165 lineVector[0].find("browser") != string::npos ||
|
|
166 lineVector[0].find("#") != string::npos)
|
|
167 return BEDGRAPH_HEADER;
|
|
168
|
|
169 if (lineVector.size() != 4)
|
|
170 return BEDGRAPH_INVALID;
|
|
171
|
|
172 bg.chrom = lineVector[0];
|
|
173
|
|
174 stringstream str_start(lineVector[1]);
|
|
175 if (! (str_start >> bg.start) ) {
|
|
176 cerr << "Input error, failed to extract start value from '" << lineVector[1]
|
|
177 << "' (column 2) in " << bedGraphFile << " line " << lineNum << endl;
|
|
178 exit(1);
|
|
179 }
|
|
180
|
|
181 stringstream str_end(lineVector[2]);
|
|
182 if (! (str_end >> bg.end) ) {
|
|
183 cerr << "Input error, failed to extract end value from '" << lineVector[2]
|
|
184 << "' (column 3) in " << bedGraphFile << " line " << lineNum << endl;
|
|
185 exit(1);
|
|
186 }
|
|
187
|
|
188 stringstream str_depth(lineVector[3]);
|
|
189 if (! (str_depth >> bg.depth) ) {
|
|
190 cerr << "Input error, failed to extract depth value from '" << lineVector[3]
|
|
191 << "' (column 4) in " << bedGraphFile << " line " << lineNum << endl;
|
|
192 exit(1);
|
|
193 }
|
|
194
|
|
195 return BEDGRAPH_VALID;
|
|
196 }
|
|
197 };
|
|
198
|
|
199 #endif /* BEDFILE_H */
|