Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/bedGraphFile/bedGraphFile.h @ 0:dfcd8b6c1bda
Uploaded
| author | aaronquinlan |
|---|---|
| date | Thu, 03 Nov 2011 10:25:04 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfcd8b6c1bda |
|---|---|
| 1 /***************************************************************************** | |
| 2 bedGraphFile.cpp | |
| 3 | |
| 4 (c) 2010 - Assaf Gordon | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #ifndef BEDGRAPHFILE_H | |
| 13 #define BEDGRAPHFILE_H | |
| 14 | |
| 15 #include "gzstream.h" | |
| 16 #include "lineFileUtilities.h" | |
| 17 #include "fileType.h" | |
| 18 #include <vector> | |
| 19 #include <map> | |
| 20 #include <set> | |
| 21 #include <string> | |
| 22 #include <iostream> | |
| 23 #include <fstream> | |
| 24 #include <sstream> | |
| 25 #include <cstring> | |
| 26 #include <algorithm> | |
| 27 #include <limits.h> | |
| 28 #include <stdint.h> | |
| 29 #include <cstdio> | |
| 30 | |
| 31 using namespace std; | |
| 32 | |
| 33 //************************************************* | |
| 34 // Data type tydedef | |
| 35 //************************************************* | |
| 36 #ifndef CHRPOS | |
| 37 typedef uint32_t CHRPOS; | |
| 38 #endif | |
| 39 | |
| 40 #ifndef DEPTH | |
| 41 typedef uint32_t DEPTH; | |
| 42 #endif | |
| 43 | |
| 44 /* | |
| 45 Structure for regular BedGraph records | |
| 46 */ | |
| 47 template <typename T> | |
| 48 class BEDGRAPH | |
| 49 { | |
| 50 public: | |
| 51 std::string chrom; | |
| 52 CHRPOS start; | |
| 53 CHRPOS end; | |
| 54 T depth; | |
| 55 | |
| 56 public: | |
| 57 typedef T DEPTH_TYPE; | |
| 58 // constructors | |
| 59 | |
| 60 // Null | |
| 61 BEDGRAPH() : | |
| 62 start(0), | |
| 63 end(0), | |
| 64 depth(T()) | |
| 65 {} | |
| 66 | |
| 67 // BEDGraph | |
| 68 BEDGRAPH(string _chrom, CHRPOS _start, CHRPOS _end, T _depth) : | |
| 69 chrom(_chrom), | |
| 70 start(_start), | |
| 71 end(_end), | |
| 72 depth(_depth) | |
| 73 {} | |
| 74 }; // BEDGraph | |
| 75 | |
| 76 typedef BEDGRAPH<int32_t> BEDGRAPH_INT; | |
| 77 typedef BEDGRAPH<std::string> BEDGRAPH_STR; | |
| 78 typedef BEDGRAPH<double> BEDGRAPH_FLOAT; | |
| 79 | |
| 80 template <typename T> | |
| 81 std::ostream& operator<< (std::ostream& strm, const BEDGRAPH<T>& bg) | |
| 82 { | |
| 83 strm << bg.chrom << "\t" | |
| 84 << bg.start << "\t" | |
| 85 << bg.end << "\t" | |
| 86 << bg.depth; | |
| 87 return strm; | |
| 88 } | |
| 89 | |
| 90 // enum to flag the state of a given line in a BEDGraph file. | |
| 91 enum BedGraphLineStatus | |
| 92 { | |
| 93 BEDGRAPH_INVALID = -1, | |
| 94 BEDGRAPH_HEADER = 0, | |
| 95 BEDGRAPH_BLANK = 1, | |
| 96 BEDGRAPH_VALID = 2 | |
| 97 }; | |
| 98 | |
| 99 | |
| 100 //************************************************ | |
| 101 // BedGraphFile Class methods and elements | |
| 102 //************************************************ | |
| 103 class BedGraphFile { | |
| 104 | |
| 105 public: | |
| 106 | |
| 107 // Constructor | |
| 108 BedGraphFile(string &); | |
| 109 | |
| 110 // Destructor | |
| 111 ~BedGraphFile(void); | |
| 112 | |
| 113 // Open a BEDGraph file for reading (creates an istream pointer) | |
| 114 void Open(void); | |
| 115 | |
| 116 // Close an opened BED file. | |
| 117 void Close(void); | |
| 118 | |
| 119 // Get the next BED entry in an opened BED file. | |
| 120 template <typename T> | |
| 121 BedGraphLineStatus GetNextBedGraph (BEDGRAPH<T> &bedgraph, int &lineNum) | |
| 122 { | |
| 123 // make sure there are still lines to process. | |
| 124 // if so, tokenize, validate and return the BED entry. | |
| 125 if (_bedGraphStream->good()) { | |
| 126 string bedGraphLine; | |
| 127 vector<string> bedGraphFields; | |
| 128 | |
| 129 // parse the bedStream pointer | |
| 130 getline(*_bedGraphStream, bedGraphLine); | |
| 131 if (_bedGraphStream->eof()) | |
| 132 return BEDGRAPH_INVALID; | |
| 133 if (_bedGraphStream->bad()) { | |
| 134 cerr << "Error while reading file '" << bedGraphFile << "' : " | |
| 135 << strerror(errno) << endl; | |
| 136 exit(1); | |
| 137 } | |
| 138 lineNum++; | |
| 139 | |
| 140 // split into a string vector. | |
| 141 Tokenize(bedGraphLine,bedGraphFields); | |
| 142 | |
| 143 // load the BED struct as long as it's a valid BED entry. | |
| 144 return parseLine(bedgraph, bedGraphFields, lineNum); | |
| 145 } | |
| 146 | |
| 147 // default if file is closed or EOF | |
| 148 return BEDGRAPH_INVALID; | |
| 149 } | |
| 150 | |
| 151 // the bedfile with which this instance is associated | |
| 152 string bedGraphFile; | |
| 153 | |
| 154 private: | |
| 155 // data | |
| 156 istream *_bedGraphStream; | |
| 157 | |
| 158 template <typename T> | |
| 159 BedGraphLineStatus parseLine (BEDGRAPH<T> &bg, const vector<string> &lineVector, int &lineNum) | |
| 160 { | |
| 161 if (lineVector.size() == 0) | |
| 162 return BEDGRAPH_BLANK; | |
| 163 | |
| 164 if (lineVector[0].find("track") != string::npos || | |
| 165 lineVector[0].find("browser") != string::npos || | |
| 166 lineVector[0].find("#") != string::npos) | |
| 167 return BEDGRAPH_HEADER; | |
| 168 | |
| 169 if (lineVector.size() != 4) | |
| 170 return BEDGRAPH_INVALID; | |
| 171 | |
| 172 bg.chrom = lineVector[0]; | |
| 173 | |
| 174 stringstream str_start(lineVector[1]); | |
| 175 if (! (str_start >> bg.start) ) { | |
| 176 cerr << "Input error, failed to extract start value from '" << lineVector[1] | |
| 177 << "' (column 2) in " << bedGraphFile << " line " << lineNum << endl; | |
| 178 exit(1); | |
| 179 } | |
| 180 | |
| 181 stringstream str_end(lineVector[2]); | |
| 182 if (! (str_end >> bg.end) ) { | |
| 183 cerr << "Input error, failed to extract end value from '" << lineVector[2] | |
| 184 << "' (column 3) in " << bedGraphFile << " line " << lineNum << endl; | |
| 185 exit(1); | |
| 186 } | |
| 187 | |
| 188 stringstream str_depth(lineVector[3]); | |
| 189 if (! (str_depth >> bg.depth) ) { | |
| 190 cerr << "Input error, failed to extract depth value from '" << lineVector[3] | |
| 191 << "' (column 4) in " << bedGraphFile << " line " << lineNum << endl; | |
| 192 exit(1); | |
| 193 } | |
| 194 | |
| 195 return BEDGRAPH_VALID; | |
| 196 } | |
| 197 }; | |
| 198 | |
| 199 #endif /* BEDFILE_H */ |
