annotate BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 bedGraphFile.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2010 - Assaf Gordon
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licenced under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #ifndef BEDGRAPHFILE_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #define BEDGRAPHFILE_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 #include "gzstream.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 #include "fileType.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 #include <vector>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 #include <map>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 #include <set>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 #include <string>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 #include <iostream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 #include <fstream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 #include <sstream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 #include <cstring>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 #include <algorithm>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 #include <limits.h>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 #include <stdint.h>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 #include <cstdio>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 using namespace std;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 // Data type tydedef
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 #ifndef CHRPOS
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 typedef uint32_t CHRPOS;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 #endif
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 #ifndef DEPTH
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 typedef uint32_t DEPTH;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 #endif
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 Structure for regular BedGraph records
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 class BEDGRAPH
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 std::string chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 CHRPOS start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 CHRPOS end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 T depth;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 typedef T DEPTH_TYPE;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 // constructors
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 // Null
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 BEDGRAPH() :
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 start(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 end(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 depth(T())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 // BEDGraph
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 BEDGRAPH(string _chrom, CHRPOS _start, CHRPOS _end, T _depth) :
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 chrom(_chrom),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 start(_start),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 end(_end),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 depth(_depth)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 }; // BEDGraph
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 typedef BEDGRAPH<int32_t> BEDGRAPH_INT;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 typedef BEDGRAPH<std::string> BEDGRAPH_STR;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 typedef BEDGRAPH<double> BEDGRAPH_FLOAT;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 std::ostream& operator<< (std::ostream& strm, const BEDGRAPH<T>& bg)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 strm << bg.chrom << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 << bg.start << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 << bg.end << "\t"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 << bg.depth;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 return strm;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 // enum to flag the state of a given line in a BEDGraph file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 enum BedGraphLineStatus
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 BEDGRAPH_INVALID = -1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 BEDGRAPH_HEADER = 0,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 BEDGRAPH_BLANK = 1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 BEDGRAPH_VALID = 2
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 //************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 // BedGraphFile Class methods and elements
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 //************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 class BedGraphFile {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 // Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 BedGraphFile(string &);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 // Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 ~BedGraphFile(void);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 // Open a BEDGraph file for reading (creates an istream pointer)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 void Open(void);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 // Close an opened BED file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 void Close(void);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 // Get the next BED entry in an opened BED file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 BedGraphLineStatus GetNextBedGraph (BEDGRAPH<T> &bedgraph, int &lineNum)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 // make sure there are still lines to process.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 // if so, tokenize, validate and return the BED entry.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 if (_bedGraphStream->good()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 string bedGraphLine;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 vector<string> bedGraphFields;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 // parse the bedStream pointer
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 getline(*_bedGraphStream, bedGraphLine);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 if (_bedGraphStream->eof())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 return BEDGRAPH_INVALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 if (_bedGraphStream->bad()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 cerr << "Error while reading file '" << bedGraphFile << "' : "
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 << strerror(errno) << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 lineNum++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 // split into a string vector.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 Tokenize(bedGraphLine,bedGraphFields);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 // load the BED struct as long as it's a valid BED entry.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 return parseLine(bedgraph, bedGraphFields, lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 // default if file is closed or EOF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 return BEDGRAPH_INVALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 // the bedfile with which this instance is associated
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 string bedGraphFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 private:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 // data
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 istream *_bedGraphStream;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 BedGraphLineStatus parseLine (BEDGRAPH<T> &bg, const vector<string> &lineVector, int &lineNum)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 if (lineVector.size() == 0)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 return BEDGRAPH_BLANK;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 if (lineVector[0].find("track") != string::npos ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 lineVector[0].find("browser") != string::npos ||
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 lineVector[0].find("#") != string::npos)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 return BEDGRAPH_HEADER;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 if (lineVector.size() != 4)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 return BEDGRAPH_INVALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 bg.chrom = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 stringstream str_start(lineVector[1]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 if (! (str_start >> bg.start) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 cerr << "Input error, failed to extract start value from '" << lineVector[1]
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 << "' (column 2) in " << bedGraphFile << " line " << lineNum << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 stringstream str_end(lineVector[2]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 if (! (str_end >> bg.end) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 cerr << "Input error, failed to extract end value from '" << lineVector[2]
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 << "' (column 3) in " << bedGraphFile << " line " << lineNum << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 stringstream str_depth(lineVector[3]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 if (! (str_depth >> bg.depth) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 cerr << "Input error, failed to extract depth value from '" << lineVector[3]
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191 << "' (column 4) in " << bedGraphFile << " line " << lineNum << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 return BEDGRAPH_VALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 #endif /* BEDFILE_H */