annotate BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 bedFile.h
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licensed under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #ifndef BEDFILE_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #define BEDFILE_H
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 // "local" includes
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 #include "gzstream.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 #include "fileType.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 // standard includes
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 #include <vector>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 #include <map>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 #include <set>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 #include <string>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 #include <iostream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 #include <fstream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 #include <sstream>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 #include <cstring>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 #include <algorithm>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 #include <limits.h>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 #include <stdint.h>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 #include <cstdio>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 //#include <tr1/unordered_map> // Experimental.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 using namespace std;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 // Data type tydedef
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 typedef uint32_t CHRPOS;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 typedef uint16_t BINLEVEL;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 typedef uint32_t BIN;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 typedef uint16_t USHORT;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 typedef uint32_t UINT;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 // Genome binning constants
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 const BIN _numBins = 37450;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 const BINLEVEL _binLevels = 7;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53 // bins range in size from 16kb to 512Mb
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 // Bin 0 spans 512Mbp, # Level 1
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 // Bins 1-8 span 64Mbp, # Level 2
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 // Bins 9-72 span 8Mbp, # Level 3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 // Bins 73-584 span 1Mbp # Level 4
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 // Bins 585-4680 span 128Kbp # Level 5
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 // Bins 4681-37449 span 16Kbp # Level 6
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 //const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 const USHORT _binFirstShift = 14; /* How much to shift to get to finest bin. */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 const USHORT _binNextShift = 3; /* How much to shift to get to next larger bin. */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 // Common data structures
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 struct DEPTH {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 UINT starts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 UINT ends;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 Structure for regular BED records
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 struct BED {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 // Regular BED fields
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 string chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 CHRPOS start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 CHRPOS end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 string name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 string score;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 string strand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 // Add'l fields for BED12 and/or custom BED annotations
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 vector<string> otherFields;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 // experimental fields for the FJOIN approach.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 bool zeroLength;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 bool added;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 bool finished;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 // list of hits from another file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 vector<BED> overlaps;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 // constructors
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 // Null
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 BED()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 : chrom(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 start(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 end(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 name(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109 score(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 strand(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 otherFields(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 zeroLength(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 added(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 finished(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 overlaps()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118 // BED3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119 BED(string chrom, CHRPOS start, CHRPOS end)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120 : chrom(chrom),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 start(start),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 end(end),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123 name(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 score(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 strand(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 otherFields(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 zeroLength(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128 added(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 finished(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 overlaps()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 // BED4
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 BED(string chrom, CHRPOS start, CHRPOS end, string strand)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 : chrom(chrom),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 start(start),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 end(end),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 name(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 score(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 strand(strand),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 otherFields(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 zeroLength(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 added(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 finished(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 overlaps()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 // BED6
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 BED(string chrom, CHRPOS start, CHRPOS end, string name,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 string score, string strand)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 : chrom(chrom),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152 start(start),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 end(end),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 name(name),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 score(score),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 strand(strand),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 otherFields(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 zeroLength(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 added(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 finished(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161 overlaps()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164 // BEDALL
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 BED(string chrom, CHRPOS start, CHRPOS end, string name,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 string score, string strand, vector<string> otherFields)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 : chrom(chrom),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168 start(start),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 end(end),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 name(name),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171 score(score),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 strand(strand),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 otherFields(otherFields),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 zeroLength(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 added(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 finished(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 overlaps()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 int size() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 return end-start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 }; // BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 Structure for each end of a paired BED record
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189 mate points to the other end.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191 struct MATE {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192 BED bed;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 int lineNum;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194 MATE *mate;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 Structure for regular BED COVERAGE records
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201 struct BEDCOV {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203 string chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205 // Regular BED fields
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 CHRPOS start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 CHRPOS end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208 string name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209 string score;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210 string strand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212 // Add'l fields for BED12 and/or custom BED annotations
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213 vector<string> otherFields;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 // flag a zero-length feature
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216 bool zeroLength;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218 // Additional fields specific to computing coverage
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 map<unsigned int, DEPTH> depthMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220 unsigned int count;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221 CHRPOS minOverlapStart;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225 // constructors
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 // Null
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227 BEDCOV()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228 : chrom(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229 start(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 end(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231 name(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 score(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233 strand(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 otherFields(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 zeroLength(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 depthMap(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237 count(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238 minOverlapStart(0)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244 Structure for BED COVERAGE records having lists of
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245 multiple coverages
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247 struct BEDCOVLIST {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249 // Regular BED fields
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250 string chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251 CHRPOS start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252 CHRPOS end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253 string name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254 string score;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 string strand;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257 // Add'l fields for BED12 and/or custom BED annotations
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258 vector<string> otherFields;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260 // flag a zero-length feature
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261 bool zeroLength;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 // Additional fields specific to computing coverage
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264 vector< map<unsigned int, DEPTH> > depthMapList;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265 vector<unsigned int> counts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266 vector<CHRPOS> minOverlapStarts;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270 // constructors
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271 // Null
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272 BEDCOVLIST()
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273 : chrom(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274 start(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 end(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276 name(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277 score(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278 strand(""),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 otherFields(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280 zeroLength(false),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281 depthMapList(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 counts(0),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283 minOverlapStarts(0)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 {}
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288 // enum to flag the state of a given line in a BED file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289 enum BedLineStatus
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
290 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
291 BED_INVALID = -1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
292 BED_HEADER = 0,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
293 BED_BLANK = 1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
294 BED_VALID = 2
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
295 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
296
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
297 // enum to indicate the type of file we are dealing with
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
298 enum FileType
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
299 {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
300 BED_FILETYPE,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
301 GFF_FILETYPE,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
302 VCF_FILETYPE
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
303 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
304
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
305 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
306 // Data structure typedefs
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
307 //*************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
308 typedef vector<BED> bedVector;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
309 typedef vector<BEDCOV> bedCovVector;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
310 typedef vector<MATE> mateVector;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
311 typedef vector<BEDCOVLIST> bedCovListVector;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
312
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
313 typedef map<BIN, bedVector, std::less<BIN> > binsToBeds;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
314 typedef map<BIN, bedCovVector, std::less<BIN> > binsToBedCovs;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
315 typedef map<BIN, mateVector, std::less<BIN> > binsToMates;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
316 typedef map<BIN, bedCovListVector, std::less<BIN> > binsToBedCovLists;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
317
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
318 typedef map<string, binsToBeds, std::less<string> > masterBedMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
319 typedef map<string, binsToBedCovs, std::less<string> > masterBedCovMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
320 typedef map<string, binsToMates, std::less<string> > masterMateMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
321 typedef map<string, binsToBedCovLists, std::less<string> > masterBedCovListMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
322 typedef map<string, bedVector, std::less<string> > masterBedMapNoBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
323
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
324
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
325 // EXPERIMENTAL - wait for TR1
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
326 // typedef vector<BED> bedVector;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
327 // typedef vector<BEDCOV> bedCovVector;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
328 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
329 // typedef tr1::unordered_map<BIN, bedVector> binsToBeds;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
330 // typedef tr1::unordered_map<BIN, bedCovVector> binsToBedCovs;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
331 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
332 // typedef tr1::unordered_map<string, binsToBeds> masterBedMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
333 // typedef tr1::unordered_map<string, binsToBedCovs> masterBedCovMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
334 // typedef tr1::unordered_map<string, bedVector> masterBedMapNoBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
335
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
336
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
337
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
338 // return the genome "bin" for a feature with this start and end
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
339 inline
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
340 BIN getBin(CHRPOS start, CHRPOS end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
341 --end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
342 start >>= _binFirstShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
343 end >>= _binFirstShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
344
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
345 for (register short i = 0; i < _binLevels; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
346 if (start == end) return _binOffsetsExtended[i] + start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
347 start >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
348 end >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
349 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
350 cerr << "start " << start << ", end " << end << " out of range in findBin (max is 512M)" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
351 return 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
352 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
353
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
354 /****************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
355 // isInteger(s): Tests if string s is a valid integer
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
356 *****************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
357 inline bool isInteger(const std::string& s) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
358 int len = s.length();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
359 for (int i = 0; i < len; i++) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
360 if (!std::isdigit(s[i])) return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
361 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
362 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
363 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
364
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
365
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
366 // return the amount of overlap between two features. Negative if none and the the
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
367 // number of negative bases is the distance between the two.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
368 inline
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
369 int overlaps(CHRPOS aS, CHRPOS aE, CHRPOS bS, CHRPOS bE) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
370 return min(aE, bE) - max(aS, bS);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
371 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
372
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
373 // is A after (to the right of) B?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
374 inline
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
375 bool after(const BED &a, const BED &b) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
376 return (a.start >= b.end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
377 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
378
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
379
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
380 // Ancillary functions
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
381 void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
382
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
383
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
384 // BED Sorting Methods
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
385 bool sortByChrom(const BED &a, const BED &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
386 bool sortByStart(const BED &a, const BED &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
387 bool sortBySizeAsc(const BED &a, const BED &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
388 bool sortBySizeDesc(const BED &a, const BED &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
389 bool sortByScoreAsc(const BED &a, const BED &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
390 bool sortByScoreDesc(const BED &a, const BED &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
391 bool byChromThenStart(BED const &a, BED const &b);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
392
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
393
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
394
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
395 //************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
396 // BedFile Class methods and elements
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
397 //************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
398 class BedFile {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
399
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
400 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
401
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
402 // Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
403 BedFile(string &);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
404
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
405 // Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
406 ~BedFile(void);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
407
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
408 // Open a BED file for reading (creates an istream pointer)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
409 void Open(void);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
410
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
411 // Rewind the pointer back to the beginning of the file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
412 void Rewind(void);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
413
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
414 // Jump to a specific byte in the file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
415 void Seek(unsigned long offset);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
416
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
417 bool Empty();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
418
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
419 // Close an opened BED file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
420 void Close(void);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
421
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
422 // Get the next BED entry in an opened BED file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
423 BedLineStatus GetNextBed (BED &bed, int &lineNum, bool forceSorted = false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
424
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
425 // Returns the next MERGED (i.e., non-overlapping) interval in an opened BED file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
426 // NOTE: assumes input file is sorted by chrom then start
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
427 bool GetNextMergedBed(BED &merged_bed, int &lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
428
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
429 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDs
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
430 void loadBedFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
431
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
432 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVs
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
433 void loadBedCovFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
434
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
435 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVLISTs
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
436 void loadBedCovListFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
437
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
438 // load a BED file into a map keyed by chrom. value is vector of BEDs
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
439 void loadBedFileIntoMapNoBin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
440
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
441 // Given a chrom, start, end and strand for a single feature,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
442 // search for all overlapping features in another BED file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
443 // Searches through each relevant genome bin on the same chromosome
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
444 // as the single feature. Note: Adapted from kent source "binKeeperFind"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
445 void FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, vector<BED> &hits, bool sameStrand, bool diffStrand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
446
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
447 // return true if at least one overlap was found. otherwise, return false.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
448 bool FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
449 bool sameStrand, bool diffStrand, float overlapFraction = 0.0);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
450
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
451 // return true if at least one __reciprocal__ overlap was found. otherwise, return false.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
452 bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
453 bool sameStrand, bool diffStrand, float overlapFraction = 0.0);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
454
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
455 // Given a chrom, start, end and strand for a single feature,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
456 // increment a the number of hits for each feature in B file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
457 // that the feature overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
458 void countHits(const BED &a, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
459
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
460 // same as above, but has special logic that processes a set of
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
461 // BED "blocks" from a single entry so as to avoid over-counting
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
462 // each "block" of a single BAM/BED12 as distinct coverage. That is,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
463 // if one read has four block, we only want to count the coverage as
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
464 // coming from one read, not four.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
465 void countSplitHits(const vector<BED> &bedBlock, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
466
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
467 // Given a chrom, start, end and strand for a single feature,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
468 // increment a the number of hits for each feature in B file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
469 // that the feature overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
470 void countListHits(const BED &a, int index, bool sameStrand, bool diffStrand);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
471
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
472 // the bedfile with which this instance is associated
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
473 string bedFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
474 unsigned int bedType; // 3-6, 12 for BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
475 // 9 for GFF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
476 bool isZeroBased;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
477
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
478 // Main data structires used by BEDTools
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
479 masterBedCovMap bedCovMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
480 masterBedCovListMap bedCovListMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
481 masterBedMap bedMap;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
482 masterBedMapNoBin bedMapNoBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
483
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
484 private:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
485
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
486 // data
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
487 bool _isGff;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
488 bool _isVcf;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
489 bool _typeIsKnown; // do we know the type? (i.e., BED, GFF, VCF)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
490 FileType _fileType; // what is the file type? (BED? GFF? VCF?)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
491 istream *_bedStream;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
492 string _bedLine;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
493 vector<string> _bedFields;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
494 int _merged_start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
495 int _merged_end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
496 string _merged_chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
497 int _prev_start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
498 string _prev_chrom;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
499
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
500 void setZeroBased(bool zeroBased);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
501 void setGff (bool isGff);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
502 void setVcf (bool isVcf);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
503 void setFileType (FileType type);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
504 void setBedType (int colNums);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
505
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
506 /******************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
507 Private definitions to circumvent linker issues with
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
508 templated member functions.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
509 *******************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
510
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
511 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
512 parseLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
513 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
514 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
515 inline BedLineStatus parseLine (T &bed, const vector<string> &lineVector, int &lineNum) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
516
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
517 //char *p2End, *p3End, *p4End, *p5End;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
518 //long l2, l3, l4, l5;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
519 unsigned int numFields = lineVector.size();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
520
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
521 // bail out if we have a blank line
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
522 if (numFields == 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
523 return BED_BLANK;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
524 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
525
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
526 if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") == string::npos) && (lineVector[0].find("#") == string::npos) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
527
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
528 if (numFields >= 3) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
529 // line parsing for all lines after the first non-header line
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
530 if (_typeIsKnown == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
531 switch(_fileType) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
532 case BED_FILETYPE:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
533 if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
534 case VCF_FILETYPE:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
535 if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
536 case GFF_FILETYPE:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
537 if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
538 default:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
539 printf("ERROR: file type encountered. Exiting\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
540 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
541 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
542 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
543 // line parsing for first non-header line: figure out file contents
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
544 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
545 // it's BED format if columns 2 and 3 are integers
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
546 if (isInteger(lineVector[1]) && isInteger(lineVector[2])) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
547 setGff(false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
548 setZeroBased(true);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
549 setFileType(BED_FILETYPE);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
550 setBedType(numFields); // we now expect numFields columns in each line
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
551 if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
552 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
553 // it's VCF, assuming the second column is numeric and there are at least 8 fields.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
554 else if (isInteger(lineVector[1]) && numFields >= 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
555 setGff(false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
556 setVcf(true);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
557 setZeroBased(false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
558 setFileType(VCF_FILETYPE);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
559 setBedType(numFields); // we now expect numFields columns in each line
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
560 if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
561 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
562 // it's GFF, assuming columns columns 4 and 5 are numeric and we have 9 fields total.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
563 else if ((numFields >= 8) && isInteger(lineVector[3]) && isInteger(lineVector[4])) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
564 setGff(true);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
565 setZeroBased(false);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
566 setFileType(GFF_FILETYPE);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
567 setBedType(numFields); // we now expect numFields columns in each line
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
568 if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
569 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
570 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
571 cerr << "Unexpected file format. Please use tab-delimited BED, GFF, or VCF. " <<
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
572 "Perhaps you have non-integer starts or ends at line " << lineNum << "?" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
573 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
574 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
575 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
576 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
577 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
578 cerr << "It looks as though you have less than 3 columns at line: " << lineNum << ". Are you sure your files are tab-delimited?" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
579 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
580 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
581 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
582 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
583 lineNum--;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
584 return BED_HEADER;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
585 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
586 // default
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
587 return BED_INVALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
588 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
589
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
590
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
591 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
592 parseBedLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
593 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
594 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
595 inline bool parseBedLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
596
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
597 // process as long as the number of fields in this
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
598 // line matches what we expect for this file.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
599 if (numFields == this->bedType) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
600 bed.chrom = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
601 int i;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
602 i = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
603 if (i<0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
604 cerr << "Error: malformed BED entry at line " << lineNum << ". Start Coordinate detected that is < 0. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
605 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
606 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
607 bed.start = (CHRPOS)i;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
608 i = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
609 if (i<0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
610 cerr << "Error: malformed BED entry at line " << lineNum << ". End Coordinate detected that is < 0. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
611 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
612 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
613 bed.end = (CHRPOS)i;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
614
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
615 // handle starts == end (e.g., insertions in reference genome)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
616 if (bed.start == bed.end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
617 bed.start--;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
618 bed.end++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
619 bed.zeroLength = true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
620 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
621
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
622 if (this->bedType == 4) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
623 bed.name = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
624 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
625 else if (this->bedType == 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
626 bed.name = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
627 bed.score = lineVector[4];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
628 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
629 else if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
630 bed.name = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
631 bed.score = lineVector[4];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
632 bed.strand = lineVector[5];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
633 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
634 else if (this->bedType > 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
635 bed.name = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
636 bed.score = lineVector[4];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
637 bed.strand = lineVector[5];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
638 for (unsigned int i = 6; i < lineVector.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
639 bed.otherFields.push_back(lineVector[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
640 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
641 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
642 else if (this->bedType != 3) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
643 cerr << "Error: unexpected number of fields at line: " << lineNum
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
644 << ". Verify that your files are TAB-delimited. Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
645 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
646 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
647
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
648 // sanity checks.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
649 if (bed.start <= bed.end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
650 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
651 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
652 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
653 cerr << "Error: malformed BED entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
654 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
655 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
656 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
657 else if (numFields == 1) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
658 cerr << "Only one BED field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
659 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
660 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
661 else if ((numFields != this->bedType) && (numFields != 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
662 cerr << "Differing number of BED fields encountered at line: " << lineNum << ". Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
663 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
664 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
665 else if ((numFields < 3) && (numFields != 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
666 cerr << "TAB delimited BED file with at least 3 fields (chrom, start, end) is required at line: "<< lineNum << ". Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
667 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
668 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
669 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
670 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
671
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
672
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
673 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
674 parseVcfLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
675 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
676 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
677 inline bool parseVcfLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
678 if (numFields == this->bedType) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
679 bed.chrom = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
680 bed.start = atoi(lineVector[1].c_str()) - 1; // VCF is one-based
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
681 bed.end = bed.start + lineVector[3].size(); // VCF 4.0 stores the size of the affected REF allele.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
682 bed.strand = "+";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
683 // construct the name from the ref and alt alleles.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
684 // if it's an annotated variant, add the rsId as well.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
685 bed.name = lineVector[3] + "/" + lineVector[4];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
686 if (lineVector[2] != ".") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
687 bed.name += "_" + lineVector[2];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
688 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
689
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
690 if (this->bedType > 2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
691 for (unsigned int i = 2; i < numFields; ++i)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
692 bed.otherFields.push_back(lineVector[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
693 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
694
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
695 if ((bed.start <= bed.end) && (bed.start >= 0) && (bed.end >= 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
696 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
697 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
698 else if (bed.start > bed.end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
699 cerr << "Error: malformed VCF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
700 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
701 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
702 else if ( (bed.start < 0) || (bed.end < 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
703 cerr << "Error: malformed VCF entry at line " << lineNum << ". Coordinate detected that is < 0. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
704 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
705 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
706 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
707 else if (numFields == 1) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
708 cerr << "Only one VCF field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
709 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
710 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
711 else if ((numFields != this->bedType) && (numFields != 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
712 cerr << "Differing number of VCF fields encountered at line: " << lineNum << ". Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
713 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
714 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
715 else if ((numFields < 2) && (numFields != 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
716 cerr << "TAB delimited VCF file with at least 2 fields (chrom, pos) is required at line: "<< lineNum << ". Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
717 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
718 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
719 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
720 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
721
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
722
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
723
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
724 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
725 parseGffLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
726 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
727 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
728 inline bool parseGffLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
729 if (numFields == this->bedType) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
730 if (this->bedType >= 8 && _isGff) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
731 bed.chrom = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
732 if (isInteger(lineVector[3]))
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
733 bed.start = atoi(lineVector[3].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
734 if (isInteger(lineVector[4]))
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
735 bed.end = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
736 bed.name = lineVector[2];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
737 bed.score = lineVector[5];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
738 bed.strand = lineVector[6].c_str();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
739 bed.otherFields.push_back(lineVector[1]); // add GFF "source". unused in BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
740 bed.otherFields.push_back(lineVector[7]); // add GFF "fname". unused in BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
741 // handle the optional 9th field.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
742 if (this->bedType == 9)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
743 bed.otherFields.push_back(lineVector[8]); // add GFF "group". unused in BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
744 bed.start--;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
745 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
746 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
747 cerr << "Error: unexpected number of fields at line: " << lineNum <<
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
748 ". Verify that your files are TAB-delimited and that your GFF file has 8 or 9 fields. Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
749 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
750 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
751 if (bed.start > bed.end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
752 cerr << "Error: malformed GFF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
753 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
754 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
755 if ( (bed.start < 0) || (bed.end < 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
756 cerr << "Error: malformed GFF entry at line " << lineNum << ". Coordinate detected that is < 1. Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
757 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
758 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
759 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
760 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
761 else if (numFields == 1) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
762 cerr << "Only one GFF field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
763 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
764 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
765 else if ((numFields != this->bedType) && (numFields != 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
766 cerr << "Differing number of GFF fields encountered at line: " << lineNum << ". Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
767 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
768 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
769 else if ((numFields < 8) && (numFields != 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
770 cerr << "TAB delimited GFF file with 8 or 9 fields is required at line: "<< lineNum << ". Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
771 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
772 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
773 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
774 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
775
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
776
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
777 public:
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
778
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
779 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
780 reportBedTab
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
781
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
782 Writes the _original_ BED entry with a TAB
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
783 at the end of the line.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
784 Works for BED3 - BED6.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
785 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
786 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
787 inline void reportBedTab(const T &bed) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
788
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
789 // if it is azeroLength feature, we need to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
790 // correct the start and end coords to what they were
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
791 // in the original file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
792 CHRPOS start = bed.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
793 CHRPOS end = bed.end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
794 if (bed.zeroLength) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
795 if (_isGff == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
796 start++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
797 end--;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
798 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
799
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
800 // BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
801 if (_isGff == false && _isVcf == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
802 if (this->bedType == 3) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
803 printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
804 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
805 else if (this->bedType == 4) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
806 printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
807 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
808 else if (this->bedType == 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
809 printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
810 bed.score.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
811 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
812 else if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
813 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
814 bed.score.c_str(), bed.strand.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
815 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
816 else if (this->bedType > 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
817 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
818 bed.score.c_str(), bed.strand.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
819
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
820 vector<string>::const_iterator othIt = bed.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
821 vector<string>::const_iterator othEnd = bed.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
822 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
823 printf("%s\t", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
824 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
825 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
826 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
827 // VCF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
828 else if (_isGff == false && _isVcf == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
829 printf ("%s\t%d\t", bed.chrom.c_str(), start+1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
830
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
831 vector<string>::const_iterator othIt = bed.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
832 vector<string>::const_iterator othEnd = bed.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
833 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
834 printf("%s\t", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
835 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
836 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
837 // GFF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
838 else if (_isGff == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
839 // "GFF-8"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
840 if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
841 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
842 bed.name.c_str(), start+1, end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
843 bed.score.c_str(), bed.strand.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
844 bed.otherFields[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
845 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
846 // "GFF-9"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
847 else if (this->bedType == 9) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
848 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
849 bed.name.c_str(), start+1, end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
850 bed.score.c_str(), bed.strand.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
851 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
852 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
853 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
854 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
855
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
856
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
857
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
858 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
859 reportBedNewLine
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
860
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
861 Writes the _original_ BED entry with a NEWLINE
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
862 at the end of the line.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
863 Works for BED3 - BED6.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
864 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
865 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
866 inline void reportBedNewLine(const T &bed) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
867
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
868 // if it is azeroLength feature, we need to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
869 // correct the start and end coords to what they were
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
870 // in the original file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
871 CHRPOS start = bed.start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
872 CHRPOS end = bed.end;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
873 if (bed.zeroLength) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
874 if (_isGff == false)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
875 start++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
876 end--;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
877 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
878
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
879 //BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
880 if (_isGff == false && _isVcf == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
881 if (this->bedType == 3) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
882 printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
883 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
884 else if (this->bedType == 4) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
885 printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
886 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
887 else if (this->bedType == 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
888 printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
889 bed.score.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
890 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
891 else if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
892 printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
893 bed.score.c_str(), bed.strand.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
894 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
895 else if (this->bedType > 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
896 printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
897 bed.score.c_str(), bed.strand.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
898
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
899 vector<string>::const_iterator othIt = bed.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
900 vector<string>::const_iterator othEnd = bed.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
901 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
902 printf("\t%s", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
903 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
904 printf("\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
905 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
906 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
907 // VCF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
908 else if (_isGff == false && _isVcf == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
909 printf ("%s\t%d\t", bed.chrom.c_str(), start+1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
910
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
911 vector<string>::const_iterator othIt = bed.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
912 vector<string>::const_iterator othEnd = bed.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
913 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
914 printf("%s\t", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
915 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
916 printf("\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
917 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
918 // GFF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
919 else if (_isGff == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
920 // "GFF-8"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
921 if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
922 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
923 bed.name.c_str(), start+1, end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
924 bed.score.c_str(), bed.strand.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
925 bed.otherFields[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
926 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
927 // "GFF-9"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
928 else if (this->bedType == 9) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
929 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
930 bed.name.c_str(), start+1, end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
931 bed.score.c_str(), bed.strand.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
932 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
933 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
934 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
935 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
936
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
937
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
938
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
939 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
940 reportBedRangeNewLine
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
941
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
942 Writes a custom start->end for a BED entry
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
943 with a NEWLINE at the end of the line.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
944
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
945 Works for BED3 - BED6.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
946 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
947 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
948 inline void reportBedRangeTab(const T &bed, CHRPOS start, CHRPOS end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
949
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
950 // if it is azeroLength feature, we need to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
951 // correct the start and end coords to what they were
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
952 // in the original file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
953 if (bed.zeroLength) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
954 start = bed.start + 1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
955 end = bed.end - 1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
956 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
957 // BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
958 if (_isGff == false && _isVcf == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
959 if (this->bedType == 3) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
960 printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
961 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
962 else if (this->bedType == 4) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
963 printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
964 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
965 else if (this->bedType == 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
966 printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
967 bed.score.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
968 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
969 else if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
970 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
971 bed.score.c_str(), bed.strand.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
972 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
973 else if (this->bedType > 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
974 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
975 bed.score.c_str(), bed.strand.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
976
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
977 vector<string>::const_iterator othIt = bed.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
978 vector<string>::const_iterator othEnd = bed.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
979 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
980 printf("%s\t", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
981 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
982 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
983 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
984 // VCF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
985 else if (_isGff == false && _isVcf == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
986 printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
987
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
988 vector<string>::const_iterator othIt = bed.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
989 vector<string>::const_iterator othEnd = bed.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
990 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
991 printf("%s\t", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
992 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
993 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
994 // GFF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
995 else if (_isGff == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
996 // "GFF-8"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
997 if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
998 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
999 bed.name.c_str(), start+1, end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1000 bed.score.c_str(), bed.strand.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1001 bed.otherFields[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1002 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1003 // "GFF-9"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1004 else if (this->bedType == 9) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1005 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1006 bed.name.c_str(), start+1, end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1007 bed.score.c_str(), bed.strand.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1008 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1009 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1010 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1011 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1012
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1013
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1014
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1015 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1016 reportBedRangeTab
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1017
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1018 Writes a custom start->end for a BED entry
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1019 with a TAB at the end of the line.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1020
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1021 Works for BED3 - BED6.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1022 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1023 template <typename T>
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1024 inline void reportBedRangeNewLine(const T &bed, CHRPOS start, CHRPOS end) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1025
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1026 // if it is azeroLength feature, we need to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1027 // correct the start and end coords to what they were
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1028 // in the original file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1029 if (bed.zeroLength) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1030 start = bed.start + 1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1031 end = bed.end - 1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1032 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1033 // BED
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1034 if (_isGff == false && _isVcf == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1035 if (this->bedType == 3) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1036 printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1037 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1038 else if (this->bedType == 4) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1039 printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1040 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1041 else if (this->bedType == 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1042 printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1043 bed.score.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1044 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1045 else if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1046 printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1047 bed.score.c_str(), bed.strand.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1048 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1049 else if (this->bedType > 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1050 printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1051 bed.score.c_str(), bed.strand.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1052
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1053 vector<string>::const_iterator othIt = bed.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1054 vector<string>::const_iterator othEnd = bed.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1055 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1056 printf("\t%s", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1057 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1058 printf("\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1059 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1060 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1061 // VCF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1062 else if (_isGff == false && _isVcf == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1063 printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1064
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1065 vector<string>::const_iterator othIt = bed.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1066 vector<string>::const_iterator othEnd = bed.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1067 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1068 printf("%s\t", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1069 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1070 printf("\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1071 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1072 // GFF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1073 else if (_isGff == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1074 // "GFF-9"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1075 if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1076 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1077 bed.name.c_str(), start+1, end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1078 bed.score.c_str(), bed.strand.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1079 bed.otherFields[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1080 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1081 // "GFF-8"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1082 else if (this->bedType == 9) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1083 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1084 bed.name.c_str(), start+1, end,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1085 bed.score.c_str(), bed.strand.c_str(),
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1086 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1087 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1088 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1089 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1090
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1091
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1092 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1093 reportNullBedTab
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1094 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1095 void reportNullBedTab() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1096
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1097 if (_isGff == false && _isVcf == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1098 if (this->bedType == 3) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1099 printf (".\t-1\t-1\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1100 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1101 else if (this->bedType == 4) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1102 printf (".\t-1\t-1\t.\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1103 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1104 else if (this->bedType == 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1105 printf (".\t-1\t-1\t.\t-1\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1106 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1107 else if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1108 printf (".\t-1\t-1\t.\t-1\t.\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1109 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1110 else if (this->bedType > 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1111 printf (".\t-1\t-1\t.\t-1\t.\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1112 for (unsigned int i = 6; i < this->bedType; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1113 printf(".\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1114 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1115 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1116 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1117 else if (_isGff == true && _isVcf == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1118 if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1119 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1120 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1121 else if (this->bedType == 9) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1122 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1123 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1124 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1125 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1126
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1127
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1128 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1129 reportNullBedTab
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1130 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1131 void reportNullBedNewLine() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1132
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1133 if (_isGff == false && _isVcf == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1134 if (this->bedType == 3) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1135 printf (".\t-1\t-1\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1136 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1137 else if (this->bedType == 4) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1138 printf (".\t-1\t-1\t.\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1139 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1140 else if (this->bedType == 5) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1141 printf (".\t-1\t-1\t.\t-1\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1142 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1143 else if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1144 printf (".\t-1\t-1\t.\t-1\t.\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1145 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1146 else if (this->bedType > 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1147 printf (".\t-1\t-1\t.\t-1\t.");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1148 for (unsigned int i = 6; i < this->bedType; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1149 printf("\t.");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1150 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1151 printf("\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1152 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1153 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1154 else if (_isGff == true && _isVcf == false) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1155 if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1156 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1157 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1158 else if (this->bedType == 9) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1159 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1160 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1161 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1162 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1163
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1164
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1165 };
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1166
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1167 #endif /* BEDFILE_H */