0
|
1 /*****************************************************************************
|
|
2 bedFile.h
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licensed under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #ifndef BEDFILE_H
|
|
13 #define BEDFILE_H
|
|
14
|
|
15 // "local" includes
|
|
16 #include "gzstream.h"
|
|
17 #include "lineFileUtilities.h"
|
|
18 #include "fileType.h"
|
|
19
|
|
20 // standard includes
|
|
21 #include <vector>
|
|
22 #include <map>
|
|
23 #include <set>
|
|
24 #include <string>
|
|
25 #include <iostream>
|
|
26 #include <fstream>
|
|
27 #include <sstream>
|
|
28 #include <cstring>
|
|
29 #include <algorithm>
|
|
30 #include <limits.h>
|
|
31 #include <stdint.h>
|
|
32 #include <cstdio>
|
|
33 //#include <tr1/unordered_map> // Experimental.
|
|
34 using namespace std;
|
|
35
|
|
36
|
|
37 //*************************************************
|
|
38 // Data type tydedef
|
|
39 //*************************************************
|
|
40 typedef uint32_t CHRPOS;
|
|
41 typedef uint16_t BINLEVEL;
|
|
42 typedef uint32_t BIN;
|
|
43 typedef uint16_t USHORT;
|
|
44 typedef uint32_t UINT;
|
|
45
|
|
46 //*************************************************
|
|
47 // Genome binning constants
|
|
48 //*************************************************
|
|
49
|
|
50 const BIN _numBins = 37450;
|
|
51 const BINLEVEL _binLevels = 7;
|
|
52
|
|
53 // bins range in size from 16kb to 512Mb
|
|
54 // Bin 0 spans 512Mbp, # Level 1
|
|
55 // Bins 1-8 span 64Mbp, # Level 2
|
|
56 // Bins 9-72 span 8Mbp, # Level 3
|
|
57 // Bins 73-584 span 1Mbp # Level 4
|
|
58 // Bins 585-4680 span 128Kbp # Level 5
|
|
59 // Bins 4681-37449 span 16Kbp # Level 6
|
|
60 const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};
|
|
61 //const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};
|
|
62
|
|
63 const USHORT _binFirstShift = 14; /* How much to shift to get to finest bin. */
|
|
64 const USHORT _binNextShift = 3; /* How much to shift to get to next larger bin. */
|
|
65
|
|
66
|
|
67 //*************************************************
|
|
68 // Common data structures
|
|
69 //*************************************************
|
|
70
|
|
71 struct DEPTH {
|
|
72 UINT starts;
|
|
73 UINT ends;
|
|
74 };
|
|
75
|
|
76
|
|
77 /*
|
|
78 Structure for regular BED records
|
|
79 */
|
|
80 struct BED {
|
|
81
|
|
82 // Regular BED fields
|
|
83 string chrom;
|
|
84 CHRPOS start;
|
|
85 CHRPOS end;
|
|
86 string name;
|
|
87 string score;
|
|
88 string strand;
|
|
89
|
|
90 // Add'l fields for BED12 and/or custom BED annotations
|
|
91 vector<string> otherFields;
|
|
92
|
|
93 // experimental fields for the FJOIN approach.
|
|
94 bool zeroLength;
|
|
95 bool added;
|
|
96 bool finished;
|
|
97 // list of hits from another file.
|
|
98 vector<BED> overlaps;
|
|
99
|
|
100 public:
|
|
101 // constructors
|
|
102
|
|
103 // Null
|
|
104 BED()
|
|
105 : chrom(""),
|
|
106 start(0),
|
|
107 end(0),
|
|
108 name(""),
|
|
109 score(""),
|
|
110 strand(""),
|
|
111 otherFields(),
|
|
112 zeroLength(false),
|
|
113 added(false),
|
|
114 finished(false),
|
|
115 overlaps()
|
|
116 {}
|
|
117
|
|
118 // BED3
|
|
119 BED(string chrom, CHRPOS start, CHRPOS end)
|
|
120 : chrom(chrom),
|
|
121 start(start),
|
|
122 end(end),
|
|
123 name(""),
|
|
124 score(""),
|
|
125 strand(""),
|
|
126 otherFields(),
|
|
127 zeroLength(false),
|
|
128 added(false),
|
|
129 finished(false),
|
|
130 overlaps()
|
|
131 {}
|
|
132
|
|
133 // BED4
|
|
134 BED(string chrom, CHRPOS start, CHRPOS end, string strand)
|
|
135 : chrom(chrom),
|
|
136 start(start),
|
|
137 end(end),
|
|
138 name(""),
|
|
139 score(""),
|
|
140 strand(strand),
|
|
141 otherFields(),
|
|
142 zeroLength(false),
|
|
143 added(false),
|
|
144 finished(false),
|
|
145 overlaps()
|
|
146 {}
|
|
147
|
|
148 // BED6
|
|
149 BED(string chrom, CHRPOS start, CHRPOS end, string name,
|
|
150 string score, string strand)
|
|
151 : chrom(chrom),
|
|
152 start(start),
|
|
153 end(end),
|
|
154 name(name),
|
|
155 score(score),
|
|
156 strand(strand),
|
|
157 otherFields(),
|
|
158 zeroLength(false),
|
|
159 added(false),
|
|
160 finished(false),
|
|
161 overlaps()
|
|
162 {}
|
|
163
|
|
164 // BEDALL
|
|
165 BED(string chrom, CHRPOS start, CHRPOS end, string name,
|
|
166 string score, string strand, vector<string> otherFields)
|
|
167 : chrom(chrom),
|
|
168 start(start),
|
|
169 end(end),
|
|
170 name(name),
|
|
171 score(score),
|
|
172 strand(strand),
|
|
173 otherFields(otherFields),
|
|
174 zeroLength(false),
|
|
175 added(false),
|
|
176 finished(false),
|
|
177 overlaps()
|
|
178 {}
|
|
179
|
|
180 int size() {
|
|
181 return end-start;
|
|
182 }
|
|
183
|
|
184 }; // BED
|
|
185
|
|
186
|
|
187 /*
|
|
188 Structure for each end of a paired BED record
|
|
189 mate points to the other end.
|
|
190 */
|
|
191 struct MATE {
|
|
192 BED bed;
|
|
193 int lineNum;
|
|
194 MATE *mate;
|
|
195 };
|
|
196
|
|
197
|
|
198 /*
|
|
199 Structure for regular BED COVERAGE records
|
|
200 */
|
|
201 struct BEDCOV {
|
|
202
|
|
203 string chrom;
|
|
204
|
|
205 // Regular BED fields
|
|
206 CHRPOS start;
|
|
207 CHRPOS end;
|
|
208 string name;
|
|
209 string score;
|
|
210 string strand;
|
|
211
|
|
212 // Add'l fields for BED12 and/or custom BED annotations
|
|
213 vector<string> otherFields;
|
|
214
|
|
215 // flag a zero-length feature
|
|
216 bool zeroLength;
|
|
217
|
|
218 // Additional fields specific to computing coverage
|
|
219 map<unsigned int, DEPTH> depthMap;
|
|
220 unsigned int count;
|
|
221 CHRPOS minOverlapStart;
|
|
222
|
|
223
|
|
224 public:
|
|
225 // constructors
|
|
226 // Null
|
|
227 BEDCOV()
|
|
228 : chrom(""),
|
|
229 start(0),
|
|
230 end(0),
|
|
231 name(""),
|
|
232 score(""),
|
|
233 strand(""),
|
|
234 otherFields(),
|
|
235 zeroLength(false),
|
|
236 depthMap(),
|
|
237 count(0),
|
|
238 minOverlapStart(0)
|
|
239 {}
|
|
240 };
|
|
241
|
|
242
|
|
243 /*
|
|
244 Structure for BED COVERAGE records having lists of
|
|
245 multiple coverages
|
|
246 */
|
|
247 struct BEDCOVLIST {
|
|
248
|
|
249 // Regular BED fields
|
|
250 string chrom;
|
|
251 CHRPOS start;
|
|
252 CHRPOS end;
|
|
253 string name;
|
|
254 string score;
|
|
255 string strand;
|
|
256
|
|
257 // Add'l fields for BED12 and/or custom BED annotations
|
|
258 vector<string> otherFields;
|
|
259
|
|
260 // flag a zero-length feature
|
|
261 bool zeroLength;
|
|
262
|
|
263 // Additional fields specific to computing coverage
|
|
264 vector< map<unsigned int, DEPTH> > depthMapList;
|
|
265 vector<unsigned int> counts;
|
|
266 vector<CHRPOS> minOverlapStarts;
|
|
267
|
|
268
|
|
269 public:
|
|
270 // constructors
|
|
271 // Null
|
|
272 BEDCOVLIST()
|
|
273 : chrom(""),
|
|
274 start(0),
|
|
275 end(0),
|
|
276 name(""),
|
|
277 score(""),
|
|
278 strand(""),
|
|
279 otherFields(),
|
|
280 zeroLength(false),
|
|
281 depthMapList(),
|
|
282 counts(0),
|
|
283 minOverlapStarts(0)
|
|
284 {}
|
|
285 };
|
|
286
|
|
287
|
|
288 // enum to flag the state of a given line in a BED file.
|
|
289 enum BedLineStatus
|
|
290 {
|
|
291 BED_INVALID = -1,
|
|
292 BED_HEADER = 0,
|
|
293 BED_BLANK = 1,
|
|
294 BED_VALID = 2
|
|
295 };
|
|
296
|
|
297 // enum to indicate the type of file we are dealing with
|
|
298 enum FileType
|
|
299 {
|
|
300 BED_FILETYPE,
|
|
301 GFF_FILETYPE,
|
|
302 VCF_FILETYPE
|
|
303 };
|
|
304
|
|
305 //*************************************************
|
|
306 // Data structure typedefs
|
|
307 //*************************************************
|
|
308 typedef vector<BED> bedVector;
|
|
309 typedef vector<BEDCOV> bedCovVector;
|
|
310 typedef vector<MATE> mateVector;
|
|
311 typedef vector<BEDCOVLIST> bedCovListVector;
|
|
312
|
|
313 typedef map<BIN, bedVector, std::less<BIN> > binsToBeds;
|
|
314 typedef map<BIN, bedCovVector, std::less<BIN> > binsToBedCovs;
|
|
315 typedef map<BIN, mateVector, std::less<BIN> > binsToMates;
|
|
316 typedef map<BIN, bedCovListVector, std::less<BIN> > binsToBedCovLists;
|
|
317
|
|
318 typedef map<string, binsToBeds, std::less<string> > masterBedMap;
|
|
319 typedef map<string, binsToBedCovs, std::less<string> > masterBedCovMap;
|
|
320 typedef map<string, binsToMates, std::less<string> > masterMateMap;
|
|
321 typedef map<string, binsToBedCovLists, std::less<string> > masterBedCovListMap;
|
|
322 typedef map<string, bedVector, std::less<string> > masterBedMapNoBin;
|
|
323
|
|
324
|
|
325 // EXPERIMENTAL - wait for TR1
|
|
326 // typedef vector<BED> bedVector;
|
|
327 // typedef vector<BEDCOV> bedCovVector;
|
|
328 //
|
|
329 // typedef tr1::unordered_map<BIN, bedVector> binsToBeds;
|
|
330 // typedef tr1::unordered_map<BIN, bedCovVector> binsToBedCovs;
|
|
331 //
|
|
332 // typedef tr1::unordered_map<string, binsToBeds> masterBedMap;
|
|
333 // typedef tr1::unordered_map<string, binsToBedCovs> masterBedCovMap;
|
|
334 // typedef tr1::unordered_map<string, bedVector> masterBedMapNoBin;
|
|
335
|
|
336
|
|
337
|
|
338 // return the genome "bin" for a feature with this start and end
|
|
339 inline
|
|
340 BIN getBin(CHRPOS start, CHRPOS end) {
|
|
341 --end;
|
|
342 start >>= _binFirstShift;
|
|
343 end >>= _binFirstShift;
|
|
344
|
|
345 for (register short i = 0; i < _binLevels; ++i) {
|
|
346 if (start == end) return _binOffsetsExtended[i] + start;
|
|
347 start >>= _binNextShift;
|
|
348 end >>= _binNextShift;
|
|
349 }
|
|
350 cerr << "start " << start << ", end " << end << " out of range in findBin (max is 512M)" << endl;
|
|
351 return 0;
|
|
352 }
|
|
353
|
|
354 /****************************************************
|
|
355 // isInteger(s): Tests if string s is a valid integer
|
|
356 *****************************************************/
|
|
357 inline bool isInteger(const std::string& s) {
|
|
358 int len = s.length();
|
|
359 for (int i = 0; i < len; i++) {
|
|
360 if (!std::isdigit(s[i])) return false;
|
|
361 }
|
|
362 return true;
|
|
363 }
|
|
364
|
|
365
|
|
366 // return the amount of overlap between two features. Negative if none and the the
|
|
367 // number of negative bases is the distance between the two.
|
|
368 inline
|
|
369 int overlaps(CHRPOS aS, CHRPOS aE, CHRPOS bS, CHRPOS bE) {
|
|
370 return min(aE, bE) - max(aS, bS);
|
|
371 }
|
|
372
|
|
373 // is A after (to the right of) B?
|
|
374 inline
|
|
375 bool after(const BED &a, const BED &b) {
|
|
376 return (a.start >= b.end);
|
|
377 }
|
|
378
|
|
379
|
|
380 // Ancillary functions
|
|
381 void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks);
|
|
382
|
|
383
|
|
384 // BED Sorting Methods
|
|
385 bool sortByChrom(const BED &a, const BED &b);
|
|
386 bool sortByStart(const BED &a, const BED &b);
|
|
387 bool sortBySizeAsc(const BED &a, const BED &b);
|
|
388 bool sortBySizeDesc(const BED &a, const BED &b);
|
|
389 bool sortByScoreAsc(const BED &a, const BED &b);
|
|
390 bool sortByScoreDesc(const BED &a, const BED &b);
|
|
391 bool byChromThenStart(BED const &a, BED const &b);
|
|
392
|
|
393
|
|
394
|
|
395 //************************************************
|
|
396 // BedFile Class methods and elements
|
|
397 //************************************************
|
|
398 class BedFile {
|
|
399
|
|
400 public:
|
|
401
|
|
402 // Constructor
|
|
403 BedFile(string &);
|
|
404
|
|
405 // Destructor
|
|
406 ~BedFile(void);
|
|
407
|
|
408 // Open a BED file for reading (creates an istream pointer)
|
|
409 void Open(void);
|
|
410
|
|
411 // Rewind the pointer back to the beginning of the file
|
|
412 void Rewind(void);
|
|
413
|
|
414 // Jump to a specific byte in the file
|
|
415 void Seek(unsigned long offset);
|
|
416
|
|
417 bool Empty();
|
|
418
|
|
419 // Close an opened BED file.
|
|
420 void Close(void);
|
|
421
|
|
422 // Get the next BED entry in an opened BED file.
|
|
423 BedLineStatus GetNextBed (BED &bed, int &lineNum, bool forceSorted = false);
|
|
424
|
|
425 // Returns the next MERGED (i.e., non-overlapping) interval in an opened BED file
|
|
426 // NOTE: assumes input file is sorted by chrom then start
|
|
427 bool GetNextMergedBed(BED &merged_bed, int &lineNum);
|
|
428
|
|
429 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDs
|
|
430 void loadBedFileIntoMap();
|
|
431
|
|
432 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVs
|
|
433 void loadBedCovFileIntoMap();
|
|
434
|
|
435 // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVLISTs
|
|
436 void loadBedCovListFileIntoMap();
|
|
437
|
|
438 // load a BED file into a map keyed by chrom. value is vector of BEDs
|
|
439 void loadBedFileIntoMapNoBin();
|
|
440
|
|
441 // Given a chrom, start, end and strand for a single feature,
|
|
442 // search for all overlapping features in another BED file.
|
|
443 // Searches through each relevant genome bin on the same chromosome
|
|
444 // as the single feature. Note: Adapted from kent source "binKeeperFind"
|
|
445 void FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, vector<BED> &hits, bool sameStrand, bool diffStrand);
|
|
446
|
|
447 // return true if at least one overlap was found. otherwise, return false.
|
|
448 bool FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand,
|
|
449 bool sameStrand, bool diffStrand, float overlapFraction = 0.0);
|
|
450
|
|
451 // return true if at least one __reciprocal__ overlap was found. otherwise, return false.
|
|
452 bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand,
|
|
453 bool sameStrand, bool diffStrand, float overlapFraction = 0.0);
|
|
454
|
|
455 // Given a chrom, start, end and strand for a single feature,
|
|
456 // increment a the number of hits for each feature in B file
|
|
457 // that the feature overlaps
|
|
458 void countHits(const BED &a, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false);
|
|
459
|
|
460 // same as above, but has special logic that processes a set of
|
|
461 // BED "blocks" from a single entry so as to avoid over-counting
|
|
462 // each "block" of a single BAM/BED12 as distinct coverage. That is,
|
|
463 // if one read has four block, we only want to count the coverage as
|
|
464 // coming from one read, not four.
|
|
465 void countSplitHits(const vector<BED> &bedBlock, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false);
|
|
466
|
|
467 // Given a chrom, start, end and strand for a single feature,
|
|
468 // increment a the number of hits for each feature in B file
|
|
469 // that the feature overlaps
|
|
470 void countListHits(const BED &a, int index, bool sameStrand, bool diffStrand);
|
|
471
|
|
472 // the bedfile with which this instance is associated
|
|
473 string bedFile;
|
|
474 unsigned int bedType; // 3-6, 12 for BED
|
|
475 // 9 for GFF
|
|
476 bool isZeroBased;
|
|
477
|
|
478 // Main data structires used by BEDTools
|
|
479 masterBedCovMap bedCovMap;
|
|
480 masterBedCovListMap bedCovListMap;
|
|
481 masterBedMap bedMap;
|
|
482 masterBedMapNoBin bedMapNoBin;
|
|
483
|
|
484 private:
|
|
485
|
|
486 // data
|
|
487 bool _isGff;
|
|
488 bool _isVcf;
|
|
489 bool _typeIsKnown; // do we know the type? (i.e., BED, GFF, VCF)
|
|
490 FileType _fileType; // what is the file type? (BED? GFF? VCF?)
|
|
491 istream *_bedStream;
|
|
492 string _bedLine;
|
|
493 vector<string> _bedFields;
|
|
494 int _merged_start;
|
|
495 int _merged_end;
|
|
496 string _merged_chrom;
|
|
497 int _prev_start;
|
|
498 string _prev_chrom;
|
|
499
|
|
500 void setZeroBased(bool zeroBased);
|
|
501 void setGff (bool isGff);
|
|
502 void setVcf (bool isVcf);
|
|
503 void setFileType (FileType type);
|
|
504 void setBedType (int colNums);
|
|
505
|
|
506 /******************************************************
|
|
507 Private definitions to circumvent linker issues with
|
|
508 templated member functions.
|
|
509 *******************************************************/
|
|
510
|
|
511 /*
|
|
512 parseLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
|
|
513 */
|
|
514 template <typename T>
|
|
515 inline BedLineStatus parseLine (T &bed, const vector<string> &lineVector, int &lineNum) {
|
|
516
|
|
517 //char *p2End, *p3End, *p4End, *p5End;
|
|
518 //long l2, l3, l4, l5;
|
|
519 unsigned int numFields = lineVector.size();
|
|
520
|
|
521 // bail out if we have a blank line
|
|
522 if (numFields == 0) {
|
|
523 return BED_BLANK;
|
|
524 }
|
|
525
|
|
526 if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") == string::npos) && (lineVector[0].find("#") == string::npos) ) {
|
|
527
|
|
528 if (numFields >= 3) {
|
|
529 // line parsing for all lines after the first non-header line
|
|
530 if (_typeIsKnown == true) {
|
|
531 switch(_fileType) {
|
|
532 case BED_FILETYPE:
|
|
533 if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
|
|
534 case VCF_FILETYPE:
|
|
535 if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
|
|
536 case GFF_FILETYPE:
|
|
537 if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
|
|
538 default:
|
|
539 printf("ERROR: file type encountered. Exiting\n");
|
|
540 exit(1);
|
|
541 }
|
|
542 }
|
|
543 // line parsing for first non-header line: figure out file contents
|
|
544 else {
|
|
545 // it's BED format if columns 2 and 3 are integers
|
|
546 if (isInteger(lineVector[1]) && isInteger(lineVector[2])) {
|
|
547 setGff(false);
|
|
548 setZeroBased(true);
|
|
549 setFileType(BED_FILETYPE);
|
|
550 setBedType(numFields); // we now expect numFields columns in each line
|
|
551 if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
|
|
552 }
|
|
553 // it's VCF, assuming the second column is numeric and there are at least 8 fields.
|
|
554 else if (isInteger(lineVector[1]) && numFields >= 8) {
|
|
555 setGff(false);
|
|
556 setVcf(true);
|
|
557 setZeroBased(false);
|
|
558 setFileType(VCF_FILETYPE);
|
|
559 setBedType(numFields); // we now expect numFields columns in each line
|
|
560 if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
|
|
561 }
|
|
562 // it's GFF, assuming columns columns 4 and 5 are numeric and we have 9 fields total.
|
|
563 else if ((numFields >= 8) && isInteger(lineVector[3]) && isInteger(lineVector[4])) {
|
|
564 setGff(true);
|
|
565 setZeroBased(false);
|
|
566 setFileType(GFF_FILETYPE);
|
|
567 setBedType(numFields); // we now expect numFields columns in each line
|
|
568 if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
|
|
569 }
|
|
570 else {
|
|
571 cerr << "Unexpected file format. Please use tab-delimited BED, GFF, or VCF. " <<
|
|
572 "Perhaps you have non-integer starts or ends at line " << lineNum << "?" << endl;
|
|
573 exit(1);
|
|
574 }
|
|
575 }
|
|
576 }
|
|
577 else {
|
|
578 cerr << "It looks as though you have less than 3 columns at line: " << lineNum << ". Are you sure your files are tab-delimited?" << endl;
|
|
579 exit(1);
|
|
580 }
|
|
581 }
|
|
582 else {
|
|
583 lineNum--;
|
|
584 return BED_HEADER;
|
|
585 }
|
|
586 // default
|
|
587 return BED_INVALID;
|
|
588 }
|
|
589
|
|
590
|
|
591 /*
|
|
592 parseBedLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
|
|
593 */
|
|
594 template <typename T>
|
|
595 inline bool parseBedLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
|
|
596
|
|
597 // process as long as the number of fields in this
|
|
598 // line matches what we expect for this file.
|
|
599 if (numFields == this->bedType) {
|
|
600 bed.chrom = lineVector[0];
|
|
601 int i;
|
|
602 i = atoi(lineVector[1].c_str());
|
|
603 if (i<0) {
|
|
604 cerr << "Error: malformed BED entry at line " << lineNum << ". Start Coordinate detected that is < 0. Exiting." << endl;
|
|
605 exit(1);
|
|
606 }
|
|
607 bed.start = (CHRPOS)i;
|
|
608 i = atoi(lineVector[2].c_str());
|
|
609 if (i<0) {
|
|
610 cerr << "Error: malformed BED entry at line " << lineNum << ". End Coordinate detected that is < 0. Exiting." << endl;
|
|
611 exit(1);
|
|
612 }
|
|
613 bed.end = (CHRPOS)i;
|
|
614
|
|
615 // handle starts == end (e.g., insertions in reference genome)
|
|
616 if (bed.start == bed.end) {
|
|
617 bed.start--;
|
|
618 bed.end++;
|
|
619 bed.zeroLength = true;
|
|
620 }
|
|
621
|
|
622 if (this->bedType == 4) {
|
|
623 bed.name = lineVector[3];
|
|
624 }
|
|
625 else if (this->bedType == 5) {
|
|
626 bed.name = lineVector[3];
|
|
627 bed.score = lineVector[4];
|
|
628 }
|
|
629 else if (this->bedType == 6) {
|
|
630 bed.name = lineVector[3];
|
|
631 bed.score = lineVector[4];
|
|
632 bed.strand = lineVector[5];
|
|
633 }
|
|
634 else if (this->bedType > 6) {
|
|
635 bed.name = lineVector[3];
|
|
636 bed.score = lineVector[4];
|
|
637 bed.strand = lineVector[5];
|
|
638 for (unsigned int i = 6; i < lineVector.size(); ++i) {
|
|
639 bed.otherFields.push_back(lineVector[i]);
|
|
640 }
|
|
641 }
|
|
642 else if (this->bedType != 3) {
|
|
643 cerr << "Error: unexpected number of fields at line: " << lineNum
|
|
644 << ". Verify that your files are TAB-delimited. Exiting..." << endl;
|
|
645 exit(1);
|
|
646 }
|
|
647
|
|
648 // sanity checks.
|
|
649 if (bed.start <= bed.end) {
|
|
650 return true;
|
|
651 }
|
|
652 else {
|
|
653 cerr << "Error: malformed BED entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
|
|
654 exit(1);
|
|
655 }
|
|
656 }
|
|
657 else if (numFields == 1) {
|
|
658 cerr << "Only one BED field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl;
|
|
659 exit(1);
|
|
660 }
|
|
661 else if ((numFields != this->bedType) && (numFields != 0)) {
|
|
662 cerr << "Differing number of BED fields encountered at line: " << lineNum << ". Exiting..." << endl;
|
|
663 exit(1);
|
|
664 }
|
|
665 else if ((numFields < 3) && (numFields != 0)) {
|
|
666 cerr << "TAB delimited BED file with at least 3 fields (chrom, start, end) is required at line: "<< lineNum << ". Exiting..." << endl;
|
|
667 exit(1);
|
|
668 }
|
|
669 return false;
|
|
670 }
|
|
671
|
|
672
|
|
673 /*
|
|
674 parseVcfLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
|
|
675 */
|
|
676 template <typename T>
|
|
677 inline bool parseVcfLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
|
|
678 if (numFields == this->bedType) {
|
|
679 bed.chrom = lineVector[0];
|
|
680 bed.start = atoi(lineVector[1].c_str()) - 1; // VCF is one-based
|
|
681 bed.end = bed.start + lineVector[3].size(); // VCF 4.0 stores the size of the affected REF allele.
|
|
682 bed.strand = "+";
|
|
683 // construct the name from the ref and alt alleles.
|
|
684 // if it's an annotated variant, add the rsId as well.
|
|
685 bed.name = lineVector[3] + "/" + lineVector[4];
|
|
686 if (lineVector[2] != ".") {
|
|
687 bed.name += "_" + lineVector[2];
|
|
688 }
|
|
689
|
|
690 if (this->bedType > 2) {
|
|
691 for (unsigned int i = 2; i < numFields; ++i)
|
|
692 bed.otherFields.push_back(lineVector[i]);
|
|
693 }
|
|
694
|
|
695 if ((bed.start <= bed.end) && (bed.start >= 0) && (bed.end >= 0)) {
|
|
696 return true;
|
|
697 }
|
|
698 else if (bed.start > bed.end) {
|
|
699 cerr << "Error: malformed VCF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
|
|
700 exit(1);
|
|
701 }
|
|
702 else if ( (bed.start < 0) || (bed.end < 0) ) {
|
|
703 cerr << "Error: malformed VCF entry at line " << lineNum << ". Coordinate detected that is < 0. Exiting." << endl;
|
|
704 exit(1);
|
|
705 }
|
|
706 }
|
|
707 else if (numFields == 1) {
|
|
708 cerr << "Only one VCF field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl;
|
|
709 exit(1);
|
|
710 }
|
|
711 else if ((numFields != this->bedType) && (numFields != 0)) {
|
|
712 cerr << "Differing number of VCF fields encountered at line: " << lineNum << ". Exiting..." << endl;
|
|
713 exit(1);
|
|
714 }
|
|
715 else if ((numFields < 2) && (numFields != 0)) {
|
|
716 cerr << "TAB delimited VCF file with at least 2 fields (chrom, pos) is required at line: "<< lineNum << ". Exiting..." << endl;
|
|
717 exit(1);
|
|
718 }
|
|
719 return false;
|
|
720 }
|
|
721
|
|
722
|
|
723
|
|
724 /*
|
|
725 parseGffLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
|
|
726 */
|
|
727 template <typename T>
|
|
728 inline bool parseGffLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
|
|
729 if (numFields == this->bedType) {
|
|
730 if (this->bedType >= 8 && _isGff) {
|
|
731 bed.chrom = lineVector[0];
|
|
732 if (isInteger(lineVector[3]))
|
|
733 bed.start = atoi(lineVector[3].c_str());
|
|
734 if (isInteger(lineVector[4]))
|
|
735 bed.end = atoi(lineVector[4].c_str());
|
|
736 bed.name = lineVector[2];
|
|
737 bed.score = lineVector[5];
|
|
738 bed.strand = lineVector[6].c_str();
|
|
739 bed.otherFields.push_back(lineVector[1]); // add GFF "source". unused in BED
|
|
740 bed.otherFields.push_back(lineVector[7]); // add GFF "fname". unused in BED
|
|
741 // handle the optional 9th field.
|
|
742 if (this->bedType == 9)
|
|
743 bed.otherFields.push_back(lineVector[8]); // add GFF "group". unused in BED
|
|
744 bed.start--;
|
|
745 }
|
|
746 else {
|
|
747 cerr << "Error: unexpected number of fields at line: " << lineNum <<
|
|
748 ". Verify that your files are TAB-delimited and that your GFF file has 8 or 9 fields. Exiting..." << endl;
|
|
749 exit(1);
|
|
750 }
|
|
751 if (bed.start > bed.end) {
|
|
752 cerr << "Error: malformed GFF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
|
|
753 exit(1);
|
|
754 }
|
|
755 if ( (bed.start < 0) || (bed.end < 0) ) {
|
|
756 cerr << "Error: malformed GFF entry at line " << lineNum << ". Coordinate detected that is < 1. Exiting." << endl;
|
|
757 exit(1);
|
|
758 }
|
|
759 return true;
|
|
760 }
|
|
761 else if (numFields == 1) {
|
|
762 cerr << "Only one GFF field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl;
|
|
763 exit(1);
|
|
764 }
|
|
765 else if ((numFields != this->bedType) && (numFields != 0)) {
|
|
766 cerr << "Differing number of GFF fields encountered at line: " << lineNum << ". Exiting..." << endl;
|
|
767 exit(1);
|
|
768 }
|
|
769 else if ((numFields < 8) && (numFields != 0)) {
|
|
770 cerr << "TAB delimited GFF file with 8 or 9 fields is required at line: "<< lineNum << ". Exiting..." << endl;
|
|
771 exit(1);
|
|
772 }
|
|
773 return false;
|
|
774 }
|
|
775
|
|
776
|
|
777 public:
|
|
778
|
|
779 /*
|
|
780 reportBedTab
|
|
781
|
|
782 Writes the _original_ BED entry with a TAB
|
|
783 at the end of the line.
|
|
784 Works for BED3 - BED6.
|
|
785 */
|
|
786 template <typename T>
|
|
787 inline void reportBedTab(const T &bed) {
|
|
788
|
|
789 // if it is azeroLength feature, we need to
|
|
790 // correct the start and end coords to what they were
|
|
791 // in the original file
|
|
792 CHRPOS start = bed.start;
|
|
793 CHRPOS end = bed.end;
|
|
794 if (bed.zeroLength) {
|
|
795 if (_isGff == false)
|
|
796 start++;
|
|
797 end--;
|
|
798 }
|
|
799
|
|
800 // BED
|
|
801 if (_isGff == false && _isVcf == false) {
|
|
802 if (this->bedType == 3) {
|
|
803 printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end);
|
|
804 }
|
|
805 else if (this->bedType == 4) {
|
|
806 printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str());
|
|
807 }
|
|
808 else if (this->bedType == 5) {
|
|
809 printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
810 bed.score.c_str());
|
|
811 }
|
|
812 else if (this->bedType == 6) {
|
|
813 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
814 bed.score.c_str(), bed.strand.c_str());
|
|
815 }
|
|
816 else if (this->bedType > 6) {
|
|
817 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
818 bed.score.c_str(), bed.strand.c_str());
|
|
819
|
|
820 vector<string>::const_iterator othIt = bed.otherFields.begin();
|
|
821 vector<string>::const_iterator othEnd = bed.otherFields.end();
|
|
822 for ( ; othIt != othEnd; ++othIt) {
|
|
823 printf("%s\t", othIt->c_str());
|
|
824 }
|
|
825 }
|
|
826 }
|
|
827 // VCF
|
|
828 else if (_isGff == false && _isVcf == true) {
|
|
829 printf ("%s\t%d\t", bed.chrom.c_str(), start+1);
|
|
830
|
|
831 vector<string>::const_iterator othIt = bed.otherFields.begin();
|
|
832 vector<string>::const_iterator othEnd = bed.otherFields.end();
|
|
833 for ( ; othIt != othEnd; ++othIt) {
|
|
834 printf("%s\t", othIt->c_str());
|
|
835 }
|
|
836 }
|
|
837 // GFF
|
|
838 else if (_isGff == true) {
|
|
839 // "GFF-8"
|
|
840 if (this->bedType == 8) {
|
|
841 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
|
|
842 bed.name.c_str(), start+1, end,
|
|
843 bed.score.c_str(), bed.strand.c_str(),
|
|
844 bed.otherFields[1].c_str());
|
|
845 }
|
|
846 // "GFF-9"
|
|
847 else if (this->bedType == 9) {
|
|
848 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
|
|
849 bed.name.c_str(), start+1, end,
|
|
850 bed.score.c_str(), bed.strand.c_str(),
|
|
851 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
|
|
852 }
|
|
853 }
|
|
854 }
|
|
855
|
|
856
|
|
857
|
|
858 /*
|
|
859 reportBedNewLine
|
|
860
|
|
861 Writes the _original_ BED entry with a NEWLINE
|
|
862 at the end of the line.
|
|
863 Works for BED3 - BED6.
|
|
864 */
|
|
865 template <typename T>
|
|
866 inline void reportBedNewLine(const T &bed) {
|
|
867
|
|
868 // if it is azeroLength feature, we need to
|
|
869 // correct the start and end coords to what they were
|
|
870 // in the original file
|
|
871 CHRPOS start = bed.start;
|
|
872 CHRPOS end = bed.end;
|
|
873 if (bed.zeroLength) {
|
|
874 if (_isGff == false)
|
|
875 start++;
|
|
876 end--;
|
|
877 }
|
|
878
|
|
879 //BED
|
|
880 if (_isGff == false && _isVcf == false) {
|
|
881 if (this->bedType == 3) {
|
|
882 printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end);
|
|
883 }
|
|
884 else if (this->bedType == 4) {
|
|
885 printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str());
|
|
886 }
|
|
887 else if (this->bedType == 5) {
|
|
888 printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
889 bed.score.c_str());
|
|
890 }
|
|
891 else if (this->bedType == 6) {
|
|
892 printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
893 bed.score.c_str(), bed.strand.c_str());
|
|
894 }
|
|
895 else if (this->bedType > 6) {
|
|
896 printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
897 bed.score.c_str(), bed.strand.c_str());
|
|
898
|
|
899 vector<string>::const_iterator othIt = bed.otherFields.begin();
|
|
900 vector<string>::const_iterator othEnd = bed.otherFields.end();
|
|
901 for ( ; othIt != othEnd; ++othIt) {
|
|
902 printf("\t%s", othIt->c_str());
|
|
903 }
|
|
904 printf("\n");
|
|
905 }
|
|
906 }
|
|
907 // VCF
|
|
908 else if (_isGff == false && _isVcf == true) {
|
|
909 printf ("%s\t%d\t", bed.chrom.c_str(), start+1);
|
|
910
|
|
911 vector<string>::const_iterator othIt = bed.otherFields.begin();
|
|
912 vector<string>::const_iterator othEnd = bed.otherFields.end();
|
|
913 for ( ; othIt != othEnd; ++othIt) {
|
|
914 printf("%s\t", othIt->c_str());
|
|
915 }
|
|
916 printf("\n");
|
|
917 }
|
|
918 // GFF
|
|
919 else if (_isGff == true) {
|
|
920 // "GFF-8"
|
|
921 if (this->bedType == 8) {
|
|
922 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
|
|
923 bed.name.c_str(), start+1, end,
|
|
924 bed.score.c_str(), bed.strand.c_str(),
|
|
925 bed.otherFields[1].c_str());
|
|
926 }
|
|
927 // "GFF-9"
|
|
928 else if (this->bedType == 9) {
|
|
929 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
|
|
930 bed.name.c_str(), start+1, end,
|
|
931 bed.score.c_str(), bed.strand.c_str(),
|
|
932 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
|
|
933 }
|
|
934 }
|
|
935 }
|
|
936
|
|
937
|
|
938
|
|
939 /*
|
|
940 reportBedRangeNewLine
|
|
941
|
|
942 Writes a custom start->end for a BED entry
|
|
943 with a NEWLINE at the end of the line.
|
|
944
|
|
945 Works for BED3 - BED6.
|
|
946 */
|
|
947 template <typename T>
|
|
948 inline void reportBedRangeTab(const T &bed, CHRPOS start, CHRPOS end) {
|
|
949
|
|
950 // if it is azeroLength feature, we need to
|
|
951 // correct the start and end coords to what they were
|
|
952 // in the original file
|
|
953 if (bed.zeroLength) {
|
|
954 start = bed.start + 1;
|
|
955 end = bed.end - 1;
|
|
956 }
|
|
957 // BED
|
|
958 if (_isGff == false && _isVcf == false) {
|
|
959 if (this->bedType == 3) {
|
|
960 printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end);
|
|
961 }
|
|
962 else if (this->bedType == 4) {
|
|
963 printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str());
|
|
964 }
|
|
965 else if (this->bedType == 5) {
|
|
966 printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
967 bed.score.c_str());
|
|
968 }
|
|
969 else if (this->bedType == 6) {
|
|
970 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
971 bed.score.c_str(), bed.strand.c_str());
|
|
972 }
|
|
973 else if (this->bedType > 6) {
|
|
974 printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
975 bed.score.c_str(), bed.strand.c_str());
|
|
976
|
|
977 vector<string>::const_iterator othIt = bed.otherFields.begin();
|
|
978 vector<string>::const_iterator othEnd = bed.otherFields.end();
|
|
979 for ( ; othIt != othEnd; ++othIt) {
|
|
980 printf("%s\t", othIt->c_str());
|
|
981 }
|
|
982 }
|
|
983 }
|
|
984 // VCF
|
|
985 else if (_isGff == false && _isVcf == true) {
|
|
986 printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1);
|
|
987
|
|
988 vector<string>::const_iterator othIt = bed.otherFields.begin();
|
|
989 vector<string>::const_iterator othEnd = bed.otherFields.end();
|
|
990 for ( ; othIt != othEnd; ++othIt) {
|
|
991 printf("%s\t", othIt->c_str());
|
|
992 }
|
|
993 }
|
|
994 // GFF
|
|
995 else if (_isGff == true) {
|
|
996 // "GFF-8"
|
|
997 if (this->bedType == 8) {
|
|
998 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
|
|
999 bed.name.c_str(), start+1, end,
|
|
1000 bed.score.c_str(), bed.strand.c_str(),
|
|
1001 bed.otherFields[1].c_str());
|
|
1002 }
|
|
1003 // "GFF-9"
|
|
1004 else if (this->bedType == 9) {
|
|
1005 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
|
|
1006 bed.name.c_str(), start+1, end,
|
|
1007 bed.score.c_str(), bed.strand.c_str(),
|
|
1008 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
|
|
1009 }
|
|
1010 }
|
|
1011 }
|
|
1012
|
|
1013
|
|
1014
|
|
1015 /*
|
|
1016 reportBedRangeTab
|
|
1017
|
|
1018 Writes a custom start->end for a BED entry
|
|
1019 with a TAB at the end of the line.
|
|
1020
|
|
1021 Works for BED3 - BED6.
|
|
1022 */
|
|
1023 template <typename T>
|
|
1024 inline void reportBedRangeNewLine(const T &bed, CHRPOS start, CHRPOS end) {
|
|
1025
|
|
1026 // if it is azeroLength feature, we need to
|
|
1027 // correct the start and end coords to what they were
|
|
1028 // in the original file
|
|
1029 if (bed.zeroLength) {
|
|
1030 start = bed.start + 1;
|
|
1031 end = bed.end - 1;
|
|
1032 }
|
|
1033 // BED
|
|
1034 if (_isGff == false && _isVcf == false) {
|
|
1035 if (this->bedType == 3) {
|
|
1036 printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end);
|
|
1037 }
|
|
1038 else if (this->bedType == 4) {
|
|
1039 printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str());
|
|
1040 }
|
|
1041 else if (this->bedType == 5) {
|
|
1042 printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
1043 bed.score.c_str());
|
|
1044 }
|
|
1045 else if (this->bedType == 6) {
|
|
1046 printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
1047 bed.score.c_str(), bed.strand.c_str());
|
|
1048 }
|
|
1049 else if (this->bedType > 6) {
|
|
1050 printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(),
|
|
1051 bed.score.c_str(), bed.strand.c_str());
|
|
1052
|
|
1053 vector<string>::const_iterator othIt = bed.otherFields.begin();
|
|
1054 vector<string>::const_iterator othEnd = bed.otherFields.end();
|
|
1055 for ( ; othIt != othEnd; ++othIt) {
|
|
1056 printf("\t%s", othIt->c_str());
|
|
1057 }
|
|
1058 printf("\n");
|
|
1059 }
|
|
1060 }
|
|
1061 // VCF
|
|
1062 else if (_isGff == false && _isVcf == true) {
|
|
1063 printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1);
|
|
1064
|
|
1065 vector<string>::const_iterator othIt = bed.otherFields.begin();
|
|
1066 vector<string>::const_iterator othEnd = bed.otherFields.end();
|
|
1067 for ( ; othIt != othEnd; ++othIt) {
|
|
1068 printf("%s\t", othIt->c_str());
|
|
1069 }
|
|
1070 printf("\n");
|
|
1071 }
|
|
1072 // GFF
|
|
1073 else if (_isGff == true) {
|
|
1074 // "GFF-9"
|
|
1075 if (this->bedType == 8) {
|
|
1076 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
|
|
1077 bed.name.c_str(), start+1, end,
|
|
1078 bed.score.c_str(), bed.strand.c_str(),
|
|
1079 bed.otherFields[1].c_str());
|
|
1080 }
|
|
1081 // "GFF-8"
|
|
1082 else if (this->bedType == 9) {
|
|
1083 printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
|
|
1084 bed.name.c_str(), start+1, end,
|
|
1085 bed.score.c_str(), bed.strand.c_str(),
|
|
1086 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
|
|
1087 }
|
|
1088 }
|
|
1089 }
|
|
1090
|
|
1091
|
|
1092 /*
|
|
1093 reportNullBedTab
|
|
1094 */
|
|
1095 void reportNullBedTab() {
|
|
1096
|
|
1097 if (_isGff == false && _isVcf == false) {
|
|
1098 if (this->bedType == 3) {
|
|
1099 printf (".\t-1\t-1\t");
|
|
1100 }
|
|
1101 else if (this->bedType == 4) {
|
|
1102 printf (".\t-1\t-1\t.\t");
|
|
1103 }
|
|
1104 else if (this->bedType == 5) {
|
|
1105 printf (".\t-1\t-1\t.\t-1\t");
|
|
1106 }
|
|
1107 else if (this->bedType == 6) {
|
|
1108 printf (".\t-1\t-1\t.\t-1\t.\t");
|
|
1109 }
|
|
1110 else if (this->bedType > 6) {
|
|
1111 printf (".\t-1\t-1\t.\t-1\t.\t");
|
|
1112 for (unsigned int i = 6; i < this->bedType; ++i) {
|
|
1113 printf(".\t");
|
|
1114 }
|
|
1115 }
|
|
1116 }
|
|
1117 else if (_isGff == true && _isVcf == false) {
|
|
1118 if (this->bedType == 8) {
|
|
1119 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t");
|
|
1120 }
|
|
1121 else if (this->bedType == 9) {
|
|
1122 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\t");
|
|
1123 }
|
|
1124 }
|
|
1125 }
|
|
1126
|
|
1127
|
|
1128 /*
|
|
1129 reportNullBedTab
|
|
1130 */
|
|
1131 void reportNullBedNewLine() {
|
|
1132
|
|
1133 if (_isGff == false && _isVcf == false) {
|
|
1134 if (this->bedType == 3) {
|
|
1135 printf (".\t-1\t-1\n");
|
|
1136 }
|
|
1137 else if (this->bedType == 4) {
|
|
1138 printf (".\t-1\t-1\t.\n");
|
|
1139 }
|
|
1140 else if (this->bedType == 5) {
|
|
1141 printf (".\t-1\t-1\t.\t-1\n");
|
|
1142 }
|
|
1143 else if (this->bedType == 6) {
|
|
1144 printf (".\t-1\t-1\t.\t-1\t.\n");
|
|
1145 }
|
|
1146 else if (this->bedType > 6) {
|
|
1147 printf (".\t-1\t-1\t.\t-1\t.");
|
|
1148 for (unsigned int i = 6; i < this->bedType; ++i) {
|
|
1149 printf("\t.");
|
|
1150 }
|
|
1151 printf("\n");
|
|
1152 }
|
|
1153 }
|
|
1154 else if (_isGff == true && _isVcf == false) {
|
|
1155 if (this->bedType == 8) {
|
|
1156 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\n");
|
|
1157 }
|
|
1158 else if (this->bedType == 9) {
|
|
1159 printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\n");
|
|
1160 }
|
|
1161 }
|
|
1162 }
|
|
1163
|
|
1164
|
|
1165 };
|
|
1166
|
|
1167 #endif /* BEDFILE_H */
|