diff BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h @ 1:bec36315bd12 default tip

Deleted selected files
author aaronquinlan
date Sat, 19 Nov 2011 14:17:03 -0500
parents dfcd8b6c1bda
children
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h	Thu Nov 03 10:25:04 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1167 +0,0 @@
-/*****************************************************************************
-  bedFile.h
-
-  (c) 2009 - Aaron Quinlan
-  Hall Laboratory
-  Department of Biochemistry and Molecular Genetics
-  University of Virginia
-  aaronquinlan@gmail.com
-
-  Licensed under the GNU General Public License 2.0 license.
-******************************************************************************/
-#ifndef BEDFILE_H
-#define BEDFILE_H
-
-// "local" includes
-#include "gzstream.h"
-#include "lineFileUtilities.h"
-#include "fileType.h"
-
-// standard includes
-#include <vector>
-#include <map>
-#include <set>
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <cstring>
-#include <algorithm>
-#include <limits.h>
-#include <stdint.h>
-#include <cstdio>
-//#include <tr1/unordered_map>  // Experimental.
-using namespace std;
-
-
-//*************************************************
-// Data type tydedef
-//*************************************************
-typedef uint32_t CHRPOS;
-typedef uint16_t BINLEVEL;
-typedef uint32_t BIN;
-typedef uint16_t USHORT;
-typedef uint32_t UINT;
-
-//*************************************************
-// Genome binning constants
-//*************************************************
-
-const BIN      _numBins   = 37450;
-const BINLEVEL _binLevels = 7;
-
-// bins range in size from 16kb to 512Mb
-// Bin  0          spans 512Mbp,   # Level 1
-// Bins 1-8        span 64Mbp,     # Level 2
-// Bins 9-72       span 8Mbp,      # Level 3
-// Bins 73-584     span 1Mbp       # Level 4
-// Bins 585-4680   span 128Kbp     # Level 5
-// Bins 4681-37449 span 16Kbp      # Level 6
-const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};
-//const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0};
-
-const USHORT _binFirstShift = 14;       /* How much to shift to get to finest bin. */
-const USHORT _binNextShift  = 3;        /* How much to shift to get to next larger bin. */
-
-
-//*************************************************
-// Common data structures
-//*************************************************
-
-struct DEPTH {
-    UINT starts;
-    UINT ends;
-};
-
-
-/*
-    Structure for regular BED records
-*/
-struct BED {
-
-    // Regular BED fields
-    string chrom;
-    CHRPOS start;
-    CHRPOS end;
-    string name;
-    string score;
-    string strand;
-
-    // Add'l fields for BED12 and/or custom BED annotations
-    vector<string> otherFields;
-
-    // experimental fields for the FJOIN approach.
-    bool   zeroLength;
-    bool   added;
-    bool   finished;
-    // list of hits from another file.
-    vector<BED> overlaps;
-
-public:
-    // constructors
-
-    // Null
-    BED()
-    : chrom(""),
-      start(0),
-      end(0),
-      name(""),
-      score(""),
-      strand(""),
-      otherFields(),
-      zeroLength(false),
-      added(false),
-      finished(false),
-      overlaps()
-    {}
-
-    // BED3
-    BED(string chrom, CHRPOS start, CHRPOS end)
-    : chrom(chrom),
-      start(start),
-      end(end),
-      name(""),
-      score(""),
-      strand(""),
-      otherFields(),
-      zeroLength(false),
-      added(false),
-      finished(false),
-      overlaps()
-    {}
-
-    // BED4
-    BED(string chrom, CHRPOS start, CHRPOS end, string strand)
-    : chrom(chrom),
-      start(start),
-      end(end),
-      name(""),
-      score(""),
-      strand(strand),
-      otherFields(),
-      zeroLength(false),
-      added(false),
-      finished(false),
-      overlaps()
-    {}
-
-    // BED6
-    BED(string chrom, CHRPOS start, CHRPOS end, string name,
-        string score, string strand)
-    : chrom(chrom),
-      start(start),
-      end(end),
-      name(name),
-      score(score),
-      strand(strand),
-      otherFields(),
-      zeroLength(false),
-      added(false),
-      finished(false),
-      overlaps()
-    {}
-
-    // BEDALL
-    BED(string chrom, CHRPOS start, CHRPOS end, string name,
-        string score, string strand, vector<string> otherFields)
-    : chrom(chrom),
-      start(start),
-      end(end),
-      name(name),
-      score(score),
-      strand(strand),
-      otherFields(otherFields),
-      zeroLength(false),
-      added(false),
-      finished(false),
-      overlaps()
-    {}
-    
-    int size() {
-        return end-start;
-    }
-
-}; // BED
-
-
-/*
-    Structure for each end of a paired BED record
-    mate points to the other end.
-*/
-struct MATE {
-    BED bed;
-    int lineNum;
-    MATE *mate;
-};
-
-
-/*
-    Structure for regular BED COVERAGE records
-*/
-struct BEDCOV {
-
-    string chrom;
-    
-    // Regular BED fields
-    CHRPOS start;
-    CHRPOS end;
-    string name;
-    string score;
-    string strand;
-
-    // Add'l fields for BED12 and/or custom BED annotations
-    vector<string> otherFields;
-
-    // flag a zero-length feature
-    bool   zeroLength;
-    
-    // Additional fields specific to computing coverage
-    map<unsigned int, DEPTH> depthMap;
-    unsigned int count;
-    CHRPOS minOverlapStart;
-    
-    
-    public:
-    // constructors
-    // Null
-    BEDCOV()
-    : chrom(""),
-      start(0),
-      end(0),
-      name(""),
-      score(""),
-      strand(""),
-      otherFields(),
-      zeroLength(false),
-      depthMap(),
-      count(0),
-      minOverlapStart(0)
-    {}
-};
-
-
-/*
-    Structure for BED COVERAGE records having lists of
-    multiple coverages
-*/
-struct BEDCOVLIST {
-
-    // Regular BED fields
-    string chrom;
-    CHRPOS start;
-    CHRPOS end;
-    string name;
-    string score;
-    string strand;
-
-    // Add'l fields for BED12 and/or custom BED annotations
-    vector<string> otherFields;
-    
-    // flag a zero-length feature
-    bool   zeroLength;
-
-    // Additional fields specific to computing coverage
-    vector< map<unsigned int, DEPTH> > depthMapList;
-    vector<unsigned int> counts;
-    vector<CHRPOS> minOverlapStarts;
-    
-    
-    public:
-    // constructors
-    // Null
-    BEDCOVLIST()
-    : chrom(""),
-      start(0),
-      end(0),
-      name(""),
-      score(""),
-      strand(""),
-      otherFields(),
-      zeroLength(false),
-      depthMapList(),
-      counts(0),
-      minOverlapStarts(0)
-    {}
-};
-
-
-// enum to flag the state of a given line in a BED file.
-enum BedLineStatus
-{
-    BED_INVALID = -1,
-    BED_HEADER  = 0,
-    BED_BLANK   = 1,
-    BED_VALID   = 2
-};
-
-// enum to indicate the type of file we are dealing with
-enum FileType
-{
-    BED_FILETYPE,
-    GFF_FILETYPE,
-    VCF_FILETYPE
-};
-
-//*************************************************
-// Data structure typedefs
-//*************************************************
-typedef vector<BED>    bedVector;
-typedef vector<BEDCOV> bedCovVector;
-typedef vector<MATE> mateVector;
-typedef vector<BEDCOVLIST> bedCovListVector;
-
-typedef map<BIN, bedVector,    std::less<BIN> > binsToBeds;
-typedef map<BIN, bedCovVector, std::less<BIN> > binsToBedCovs;
-typedef map<BIN, mateVector, std::less<BIN> > binsToMates;
-typedef map<BIN, bedCovListVector, std::less<BIN> > binsToBedCovLists;
-
-typedef map<string, binsToBeds, std::less<string> >    masterBedMap;
-typedef map<string, binsToBedCovs, std::less<string> > masterBedCovMap;
-typedef map<string, binsToMates, std::less<string> > masterMateMap;
-typedef map<string, binsToBedCovLists, std::less<string> > masterBedCovListMap;
-typedef map<string, bedVector, std::less<string> >     masterBedMapNoBin;
-
-
-// EXPERIMENTAL - wait for TR1
-// typedef vector<BED>    bedVector;
-// typedef vector<BEDCOV> bedCovVector;
-//
-// typedef tr1::unordered_map<BIN, bedVector> binsToBeds;
-// typedef tr1::unordered_map<BIN, bedCovVector> binsToBedCovs;
-//
-// typedef tr1::unordered_map<string, binsToBeds>    masterBedMap;
-// typedef tr1::unordered_map<string, binsToBedCovs> masterBedCovMap;
-// typedef tr1::unordered_map<string, bedVector>     masterBedMapNoBin;
-
-
-
-// return the genome "bin" for a feature with this start and end
-inline
-BIN getBin(CHRPOS start, CHRPOS end) {
-    --end;
-    start >>= _binFirstShift;
-    end   >>= _binFirstShift;
-
-    for (register short i = 0; i < _binLevels; ++i) {
-        if (start == end) return _binOffsetsExtended[i] + start;
-        start >>= _binNextShift;
-        end   >>= _binNextShift;
-    }
-    cerr << "start " << start << ", end " << end << " out of range in findBin (max is 512M)" << endl;
-    return 0;
-}
-
-/****************************************************
-// isInteger(s): Tests if string s is a valid integer
-*****************************************************/
-inline bool isInteger(const std::string& s) {
-    int len = s.length();
-    for (int i = 0; i < len; i++) {
-        if (!std::isdigit(s[i])) return false;
-    }
-    return true;
-}
-
-
-// return the amount of overlap between two features.  Negative if none and the the
-// number of negative bases is the distance between the two.
-inline
-int overlaps(CHRPOS aS, CHRPOS aE, CHRPOS bS, CHRPOS bE) {
-    return min(aE, bE) - max(aS, bS);
-}
-
-// is A after (to the right of) B?
-inline
-bool after(const BED &a, const BED &b) {
-    return (a.start >= b.end);
-}
-
-
-// Ancillary functions
-void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks);
-
-
-// BED Sorting Methods
-bool sortByChrom(const BED &a, const BED &b);
-bool sortByStart(const BED &a, const BED &b);
-bool sortBySizeAsc(const BED &a, const BED &b);
-bool sortBySizeDesc(const BED &a, const BED &b);
-bool sortByScoreAsc(const BED &a, const BED &b);
-bool sortByScoreDesc(const BED &a, const BED &b);
-bool byChromThenStart(BED const &a, BED const &b);
-
-
-
-//************************************************
-// BedFile Class methods and elements
-//************************************************
-class BedFile {
-
-public:
-
-    // Constructor
-    BedFile(string &);
-
-    // Destructor
-    ~BedFile(void);
-
-    // Open a BED file for reading (creates an istream pointer)
-    void Open(void);
-    
-    // Rewind the pointer back to the beginning of the file
-    void Rewind(void);
-
-    // Jump to a specific byte in the file
-    void Seek(unsigned long offset);
-    
-    bool Empty();
-
-    // Close an opened BED file.
-    void Close(void);
-
-    // Get the next BED entry in an opened BED file.
-    BedLineStatus GetNextBed (BED &bed, int &lineNum, bool forceSorted = false);
-    
-    // Returns the next MERGED (i.e., non-overlapping) interval in an opened BED file
-    // NOTE: assumes input file is sorted by chrom then start
-    bool GetNextMergedBed(BED &merged_bed, int &lineNum);
-
-    // load a BED file into a map keyed by chrom, then bin. value is vector of BEDs
-    void loadBedFileIntoMap();
-
-    // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVs
-    void loadBedCovFileIntoMap();
-
-    // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVLISTs
-    void loadBedCovListFileIntoMap();
-
-    // load a BED file into a map keyed by chrom. value is vector of BEDs
-    void loadBedFileIntoMapNoBin();
-
-    // Given a chrom, start, end and strand for a single feature,
-    // search for all overlapping features in another BED file.
-    // Searches through each relevant genome bin on the same chromosome
-    // as the single feature. Note: Adapted from kent source "binKeeperFind"
-    void FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, vector<BED> &hits, bool sameStrand, bool diffStrand);
-
-    // return true if at least one overlap was found.  otherwise, return false.
-    bool FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand,
-                                        bool sameStrand, bool diffStrand, float overlapFraction = 0.0);
-
-    // return true if at least one __reciprocal__ overlap was found.  otherwise, return false.
-    bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand,
-                                                    bool sameStrand, bool diffStrand, float overlapFraction = 0.0);
-
-    // Given a chrom, start, end and strand for a single feature,
-    // increment a the number of hits for each feature in B file
-    // that the feature overlaps
-    void countHits(const BED &a, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false);
-
-    // same as above, but has special logic that processes a set of
-    // BED "blocks" from a single entry so as to avoid over-counting
-    // each "block" of a single BAM/BED12 as distinct coverage.  That is,
-    // if one read has four block, we only want to count the coverage as
-    // coming from one read, not four.
-    void countSplitHits(const vector<BED> &bedBlock, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false);
-
-    // Given a chrom, start, end and strand for a single feature,
-    // increment a the number of hits for each feature in B file
-    // that the feature overlaps
-    void countListHits(const BED &a, int index, bool sameStrand, bool diffStrand);
-
-    // the bedfile with which this instance is associated
-    string bedFile;
-    unsigned int bedType;  // 3-6, 12 for BED
-                           // 9 for GFF
-    bool isZeroBased;
-
-    // Main data structires used by BEDTools
-    masterBedCovMap      bedCovMap;
-    masterBedCovListMap  bedCovListMap;
-    masterBedMap         bedMap;
-    masterBedMapNoBin    bedMapNoBin;
-
-private:
-
-    // data
-    bool _isGff;
-    bool _isVcf;
-    bool _typeIsKnown;        // do we know the type?   (i.e., BED, GFF, VCF)
-    FileType   _fileType;     // what is the file type? (BED? GFF? VCF?)
-    istream   *_bedStream;
-    string _bedLine;
-    vector<string> _bedFields;
-    int _merged_start;
-    int _merged_end;
-    string _merged_chrom;
-    int _prev_start;
-    string _prev_chrom;
-
-    void setZeroBased(bool zeroBased);
-    void setGff (bool isGff);
-    void setVcf (bool isVcf);
-    void setFileType (FileType type);
-    void setBedType (int colNums);
-
-    /******************************************************
-    Private definitions to circumvent linker issues with
-    templated member functions.
-    *******************************************************/
-
-    /*
-        parseLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
-    */
-    template <typename T>
-    inline BedLineStatus parseLine (T &bed, const vector<string> &lineVector, int &lineNum) {
-
-        //char *p2End, *p3End, *p4End, *p5End;
-        //long l2, l3, l4, l5;
-        unsigned int numFields = lineVector.size();
-
-        // bail out if we have a blank line
-        if (numFields == 0) {
-            return BED_BLANK;
-        }
-
-        if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") == string::npos) && (lineVector[0].find("#") == string::npos) ) {
-
-            if (numFields >= 3) {
-                // line parsing for all lines after the first non-header line
-                if (_typeIsKnown == true) {
-                    switch(_fileType) {
-                        case BED_FILETYPE:
-                            if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
-                        case VCF_FILETYPE:
-                            if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
-                        case GFF_FILETYPE:
-                            if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
-                        default:
-                            printf("ERROR: file type encountered. Exiting\n");
-                            exit(1);
-                    }
-                }
-                // line parsing for first non-header line: figure out file contents
-                else {
-                    // it's BED format if columns 2 and 3 are integers
-                    if (isInteger(lineVector[1]) && isInteger(lineVector[2])) {
-                        setGff(false);
-                        setZeroBased(true);
-                        setFileType(BED_FILETYPE);
-                        setBedType(numFields);       // we now expect numFields columns in each line
-                        if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
-                    }
-                    // it's VCF, assuming the second column is numeric and there are at least 8 fields.
-                    else if (isInteger(lineVector[1]) && numFields >= 8) {
-                        setGff(false);
-                        setVcf(true);
-                        setZeroBased(false);
-                        setFileType(VCF_FILETYPE);
-                        setBedType(numFields);       // we now expect numFields columns in each line
-                        if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
-                    }
-                    // it's GFF, assuming columns columns 4 and 5 are numeric and we have 9 fields total.
-                    else if ((numFields >= 8) && isInteger(lineVector[3]) && isInteger(lineVector[4])) {
-                        setGff(true);
-                        setZeroBased(false);
-                        setFileType(GFF_FILETYPE);
-                        setBedType(numFields);       // we now expect numFields columns in each line
-                        if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID;
-                    }
-                    else {
-                        cerr << "Unexpected file format.  Please use tab-delimited BED, GFF, or VCF. " <<
-                                "Perhaps you have non-integer starts or ends at line " << lineNum << "?" << endl;
-                        exit(1);
-                    }
-                }
-            }
-            else {
-                cerr << "It looks as though you have less than 3 columns at line: " << lineNum << ".  Are you sure your files are tab-delimited?" << endl;
-                exit(1);
-            }
-        }
-        else {
-            lineNum--;
-            return BED_HEADER;
-        }
-        // default
-        return BED_INVALID;
-    }
-
-
-    /*
-        parseBedLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
-    */
-    template <typename T>
-    inline bool parseBedLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
-
-        // process as long as the number of fields in this
-        // line matches what we expect for this file.
-        if (numFields == this->bedType) {
-            bed.chrom = lineVector[0];
-            int i;
-            i = atoi(lineVector[1].c_str());
-            if (i<0) {
-                 cerr << "Error: malformed BED entry at line " << lineNum << ". Start Coordinate detected that is < 0. Exiting." << endl;
-                 exit(1);
-            }
-            bed.start = (CHRPOS)i;
-            i = atoi(lineVector[2].c_str());
-            if (i<0) {
-                cerr << "Error: malformed BED entry at line " << lineNum << ". End Coordinate detected that is < 0. Exiting." << endl;
-                exit(1);
-            }
-            bed.end = (CHRPOS)i;
-            
-            // handle starts == end (e.g., insertions in reference genome)
-            if (bed.start == bed.end) {
-                bed.start--;
-                bed.end++;
-                bed.zeroLength = true;
-            }
-            
-            if (this->bedType == 4) {
-                bed.name = lineVector[3];
-            }
-            else if (this->bedType == 5) {
-                bed.name = lineVector[3];
-                bed.score = lineVector[4];
-            }
-            else if (this->bedType == 6) {
-                bed.name = lineVector[3];
-                bed.score = lineVector[4];
-                bed.strand = lineVector[5];
-            }
-            else if (this->bedType > 6) {
-                bed.name = lineVector[3];
-                bed.score = lineVector[4];
-                bed.strand = lineVector[5];
-                for (unsigned int i = 6; i < lineVector.size(); ++i) {
-                    bed.otherFields.push_back(lineVector[i]);
-                }
-            }
-            else if (this->bedType != 3) {
-                cerr << "Error: unexpected number of fields at line: " << lineNum
-                     << ".  Verify that your files are TAB-delimited.  Exiting..." << endl;
-                exit(1);
-            }
-
-            // sanity checks.
-            if (bed.start <= bed.end) {
-                return true;
-            }
-            else {
-                cerr << "Error: malformed BED entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
-                exit(1);
-            }
-        }
-        else if (numFields == 1) {
-            cerr << "Only one BED field detected: " << lineNum << ".  Verify that your files are TAB-delimited.  Exiting..." << endl;
-            exit(1);
-        }
-        else if ((numFields != this->bedType) && (numFields != 0)) {
-            cerr << "Differing number of BED fields encountered at line: " << lineNum << ".  Exiting..." << endl;
-            exit(1);
-        }
-        else if ((numFields < 3) && (numFields != 0)) {
-            cerr << "TAB delimited BED file with at least 3 fields (chrom, start, end) is required at line: "<< lineNum << ".  Exiting..." << endl;
-            exit(1);
-        }
-        return false;
-    }
-
-
-    /*
-        parseVcfLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
-    */
-    template <typename T>
-    inline bool parseVcfLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
-        if (numFields == this->bedType) {
-            bed.chrom  = lineVector[0];
-            bed.start  = atoi(lineVector[1].c_str()) - 1;  // VCF is one-based
-            bed.end    = bed.start + lineVector[3].size(); // VCF 4.0 stores the size of the affected REF allele.
-            bed.strand = "+";
-            // construct the name from the ref and alt alleles.
-            // if it's an annotated variant, add the rsId as well.
-            bed.name   = lineVector[3] + "/" + lineVector[4];
-            if (lineVector[2] != ".") {
-                bed.name += "_" + lineVector[2];
-            }
-
-            if (this->bedType > 2) {
-                for (unsigned int i = 2; i < numFields; ++i)
-                    bed.otherFields.push_back(lineVector[i]);
-            }
-
-            if ((bed.start <= bed.end) && (bed.start >= 0) && (bed.end >= 0)) {
-                return true;
-            }
-            else if (bed.start > bed.end) {
-                cerr << "Error: malformed VCF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
-                exit(1);
-            }
-            else if ( (bed.start < 0) || (bed.end < 0) ) {
-                cerr << "Error: malformed VCF entry at line " << lineNum << ". Coordinate detected that is < 0. Exiting." << endl;
-                exit(1);
-            }
-        }
-        else if (numFields == 1) {
-            cerr << "Only one VCF field detected: " << lineNum << ".  Verify that your files are TAB-delimited.  Exiting..." << endl;
-            exit(1);
-        }
-        else if ((numFields != this->bedType) && (numFields != 0)) {
-            cerr << "Differing number of VCF fields encountered at line: " << lineNum << ".  Exiting..." << endl;
-            exit(1);
-        }
-        else if ((numFields < 2) && (numFields != 0)) {
-            cerr << "TAB delimited VCF file with at least 2 fields (chrom, pos) is required at line: "<< lineNum << ".  Exiting..." << endl;
-            exit(1);
-        }
-        return false;
-    }
-
-
-
-    /*
-        parseGffLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.)
-    */
-    template <typename T>
-    inline bool parseGffLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) {
-        if (numFields == this->bedType) {
-            if (this->bedType >= 8 && _isGff) {
-                bed.chrom = lineVector[0];
-                if (isInteger(lineVector[3]))
-                    bed.start  = atoi(lineVector[3].c_str());
-                if (isInteger(lineVector[4]))
-                    bed.end  = atoi(lineVector[4].c_str());
-                bed.name   = lineVector[2];
-                bed.score  = lineVector[5];
-                bed.strand = lineVector[6].c_str();
-                bed.otherFields.push_back(lineVector[1]);  // add GFF "source". unused in BED
-                bed.otherFields.push_back(lineVector[7]);  // add GFF "fname". unused in BED
-                // handle the optional 9th field.
-                if (this->bedType == 9)
-                    bed.otherFields.push_back(lineVector[8]);  // add GFF "group". unused in BED
-                bed.start--;
-            }
-            else {
-                cerr << "Error: unexpected number of fields at line: " << lineNum <<
-                        ".  Verify that your files are TAB-delimited and that your GFF file has 8 or 9 fields.  Exiting..." << endl;
-                exit(1);
-            }
-            if (bed.start > bed.end) {
-                cerr << "Error: malformed GFF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl;
-                exit(1);
-            }
-            if ( (bed.start < 0) || (bed.end < 0) ) {
-                cerr << "Error: malformed GFF entry at line " << lineNum << ". Coordinate detected that is < 1. Exiting." << endl;
-                exit(1);
-            }
-            return true;
-        }
-        else if (numFields == 1) {
-            cerr << "Only one GFF field detected: " << lineNum << ".  Verify that your files are TAB-delimited.  Exiting..." << endl;
-            exit(1);
-        }
-        else if ((numFields != this->bedType) && (numFields != 0)) {
-            cerr << "Differing number of GFF fields encountered at line: " << lineNum << ".  Exiting..." << endl;
-            exit(1);
-        }
-        else if ((numFields < 8) && (numFields != 0)) {
-            cerr << "TAB delimited GFF file with 8 or 9 fields is required at line: "<< lineNum << ".  Exiting..." << endl;
-            exit(1);
-        }
-        return false;
-    }
-
-
-public:
-
-    /*
-        reportBedTab
-
-        Writes the _original_ BED entry with a TAB
-        at the end of the line.
-        Works for BED3 - BED6.
-    */
-    template <typename T>
-    inline void reportBedTab(const T &bed) {
-        
-        // if it is azeroLength feature, we need to
-        // correct the start and end coords to what they were
-        // in the original file
-        CHRPOS start = bed.start;
-        CHRPOS end   = bed.end;
-        if (bed.zeroLength) {
-            if (_isGff == false)
-                start++;
-            end--;
-        }
-        
-        // BED
-        if (_isGff == false && _isVcf == false) {
-            if (this->bedType == 3) {
-                printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end);
-            }
-            else if (this->bedType == 4) {
-                printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str());
-            }
-            else if (this->bedType == 5) {
-                printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                bed.score.c_str());
-            }
-            else if (this->bedType == 6) {
-                printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                    bed.score.c_str(), bed.strand.c_str());
-            }
-            else if (this->bedType > 6) {
-                printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                    bed.score.c_str(), bed.strand.c_str());
-
-                vector<string>::const_iterator othIt = bed.otherFields.begin();
-                vector<string>::const_iterator othEnd = bed.otherFields.end();
-                for ( ; othIt != othEnd; ++othIt) {
-                    printf("%s\t", othIt->c_str());
-                }
-            }
-        }
-        // VCF
-        else if (_isGff == false && _isVcf == true) {
-            printf ("%s\t%d\t", bed.chrom.c_str(), start+1);
-
-            vector<string>::const_iterator othIt = bed.otherFields.begin();
-            vector<string>::const_iterator othEnd = bed.otherFields.end();
-            for ( ; othIt != othEnd; ++othIt) {
-                printf("%s\t", othIt->c_str());
-            }
-        }
-        // GFF
-        else if (_isGff == true) {
-            // "GFF-8"
-            if (this->bedType == 8) {
-                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                                 bed.name.c_str(), start+1, end,
-                                                                 bed.score.c_str(), bed.strand.c_str(),
-                                                                 bed.otherFields[1].c_str());
-            }
-            // "GFF-9"
-            else if (this->bedType == 9) {
-                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                                 bed.name.c_str(), start+1, end,
-                                                                 bed.score.c_str(), bed.strand.c_str(),
-                                                                 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
-            }
-        }
-    }
-
-
-
-    /*
-        reportBedNewLine
-
-        Writes the _original_ BED entry with a NEWLINE
-        at the end of the line.
-        Works for BED3 - BED6.
-    */
-    template <typename T>
-    inline void reportBedNewLine(const T &bed) {
-        
-        // if it is azeroLength feature, we need to
-        // correct the start and end coords to what they were
-        // in the original file
-        CHRPOS start = bed.start;
-        CHRPOS end   = bed.end;
-        if (bed.zeroLength) {
-            if (_isGff == false)
-                start++;
-            end--;
-        }
-        
-        //BED
-        if (_isGff == false && _isVcf == false) {
-            if (this->bedType == 3) {
-                printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end);
-            }
-            else if (this->bedType == 4) {
-                printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str());
-            }
-            else if (this->bedType == 5) {
-                printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                bed.score.c_str());
-            }
-            else if (this->bedType == 6) {
-                printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                    bed.score.c_str(), bed.strand.c_str());
-            }
-            else if (this->bedType > 6) {
-                printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                    bed.score.c_str(), bed.strand.c_str());
-
-                vector<string>::const_iterator othIt = bed.otherFields.begin();
-                vector<string>::const_iterator othEnd = bed.otherFields.end();
-                for ( ; othIt != othEnd; ++othIt) {
-                    printf("\t%s", othIt->c_str());
-                }
-                printf("\n");
-            }
-        }
-        // VCF
-        else if (_isGff == false && _isVcf == true) {
-            printf ("%s\t%d\t", bed.chrom.c_str(), start+1);
-
-            vector<string>::const_iterator othIt = bed.otherFields.begin();
-            vector<string>::const_iterator othEnd = bed.otherFields.end();
-            for ( ; othIt != othEnd; ++othIt) {
-                printf("%s\t", othIt->c_str());
-            }
-            printf("\n");
-        }
-        // GFF
-        else if (_isGff == true) {
-            // "GFF-8"
-            if (this->bedType == 8) {
-                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                                 bed.name.c_str(), start+1, end,
-                                                                 bed.score.c_str(), bed.strand.c_str(),
-                                                                 bed.otherFields[1].c_str());
-            }
-            // "GFF-9"
-            else if (this->bedType == 9) {
-                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                                 bed.name.c_str(), start+1, end,
-                                                                 bed.score.c_str(), bed.strand.c_str(),
-                                                                 bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
-            }
-        }
-    }
-
-
-
-    /*
-        reportBedRangeNewLine
-
-        Writes a custom start->end for a BED entry
-        with a NEWLINE at the end of the line.
-
-        Works for BED3 - BED6.
-    */
-    template <typename T>
-    inline void reportBedRangeTab(const T &bed, CHRPOS start, CHRPOS end) {
-        
-        // if it is azeroLength feature, we need to
-        // correct the start and end coords to what they were
-        // in the original file
-        if (bed.zeroLength) {
-            start = bed.start + 1;
-            end   = bed.end - 1;
-        }
-        // BED
-        if (_isGff == false && _isVcf == false) {
-            if (this->bedType == 3) {
-                printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end);
-            }
-            else if (this->bedType == 4) {
-                printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str());
-            }
-            else if (this->bedType == 5) {
-                printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                bed.score.c_str());
-            }
-            else if (this->bedType == 6) {
-                printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                    bed.score.c_str(), bed.strand.c_str());
-            }
-            else if (this->bedType > 6) {
-                printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                    bed.score.c_str(), bed.strand.c_str());
-
-                vector<string>::const_iterator othIt = bed.otherFields.begin();
-                vector<string>::const_iterator othEnd = bed.otherFields.end();
-                for ( ; othIt != othEnd; ++othIt) {
-                    printf("%s\t", othIt->c_str());
-                }
-            }
-        }
-        // VCF
-        else if (_isGff == false && _isVcf == true) {
-            printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1);
-
-            vector<string>::const_iterator othIt = bed.otherFields.begin();
-            vector<string>::const_iterator othEnd = bed.otherFields.end();
-            for ( ; othIt != othEnd; ++othIt) {
-                printf("%s\t", othIt->c_str());
-            }
-        }
-        // GFF
-        else if (_isGff == true) {
-            // "GFF-8"
-            if (this->bedType == 8) {
-                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                             bed.name.c_str(), start+1, end,
-                                                             bed.score.c_str(), bed.strand.c_str(),
-                                                             bed.otherFields[1].c_str());
-            }
-            // "GFF-9"
-            else if (this->bedType == 9) {
-                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                             bed.name.c_str(), start+1, end,
-                                                             bed.score.c_str(), bed.strand.c_str(),
-                                                             bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
-            }
-        }
-    }
-
-
-
-    /*
-        reportBedRangeTab
-
-        Writes a custom start->end for a BED entry
-        with a TAB at the end of the line.
-
-        Works for BED3 - BED6.
-    */
-    template <typename T>
-    inline void reportBedRangeNewLine(const T &bed, CHRPOS start, CHRPOS end) {
-        
-        // if it is azeroLength feature, we need to
-        // correct the start and end coords to what they were
-        // in the original file
-        if (bed.zeroLength) {
-            start = bed.start + 1;
-            end   = bed.end - 1;
-        }
-        // BED
-        if (_isGff == false && _isVcf == false) {
-            if (this->bedType == 3) {
-                printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end);
-            }
-            else if (this->bedType == 4) {
-                printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str());
-            }
-            else if (this->bedType == 5) {
-                printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                bed.score.c_str());
-            }
-            else if (this->bedType == 6) {
-                printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                    bed.score.c_str(), bed.strand.c_str());
-            }
-            else if (this->bedType > 6) {
-                printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(),
-                                                    bed.score.c_str(), bed.strand.c_str());
-
-                vector<string>::const_iterator othIt = bed.otherFields.begin();
-                vector<string>::const_iterator othEnd = bed.otherFields.end();
-                for ( ; othIt != othEnd; ++othIt) {
-                    printf("\t%s", othIt->c_str());
-                }
-                printf("\n");
-            }
-        }
-        // VCF
-        else if (_isGff == false && _isVcf == true) {
-            printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1);
-
-            vector<string>::const_iterator othIt = bed.otherFields.begin();
-            vector<string>::const_iterator othEnd = bed.otherFields.end();
-            for ( ; othIt != othEnd; ++othIt) {
-                printf("%s\t", othIt->c_str());
-            }
-            printf("\n");
-        }
-        // GFF
-        else if (_isGff == true) {
-            // "GFF-9"
-            if (this->bedType == 8) {
-                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                             bed.name.c_str(), start+1, end,
-                                                             bed.score.c_str(), bed.strand.c_str(),
-                                                             bed.otherFields[1].c_str());
-            }
-            // "GFF-8"
-            else if (this->bedType == 9) {
-                printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(),
-                                                             bed.name.c_str(), start+1, end,
-                                                             bed.score.c_str(), bed.strand.c_str(),
-                                                             bed.otherFields[1].c_str(), bed.otherFields[2].c_str());
-            }
-        }
-    }
-
-
-    /*
-        reportNullBedTab
-    */
-    void reportNullBedTab() {
-
-        if (_isGff == false && _isVcf == false) {
-            if (this->bedType == 3) {
-                printf (".\t-1\t-1\t");
-            }
-            else if (this->bedType == 4) {
-                printf (".\t-1\t-1\t.\t");
-            }
-            else if (this->bedType == 5) {
-                printf (".\t-1\t-1\t.\t-1\t");
-            }
-            else if (this->bedType == 6) {
-                printf (".\t-1\t-1\t.\t-1\t.\t");
-            }
-            else if (this->bedType > 6) {
-                printf (".\t-1\t-1\t.\t-1\t.\t");
-                for (unsigned int i = 6; i < this->bedType; ++i) {
-                    printf(".\t");
-                }
-            }
-        }
-        else if (_isGff == true && _isVcf == false) {
-            if (this->bedType == 8) {
-                printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t");
-            }
-            else if (this->bedType == 9) {
-                printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\t");
-            }
-        }
-    }
-
-
-    /*
-        reportNullBedTab
-    */
-    void reportNullBedNewLine() {
-
-        if (_isGff == false && _isVcf == false) {
-            if (this->bedType == 3) {
-                printf (".\t-1\t-1\n");
-            }
-            else if (this->bedType == 4) {
-                printf (".\t-1\t-1\t.\n");
-            }
-            else if (this->bedType == 5) {
-                printf (".\t-1\t-1\t.\t-1\n");
-            }
-            else if (this->bedType == 6) {
-                printf (".\t-1\t-1\t.\t-1\t.\n");
-            }
-            else if (this->bedType > 6) {
-                printf (".\t-1\t-1\t.\t-1\t.");
-                for (unsigned int i = 6; i < this->bedType; ++i) {
-                    printf("\t.");
-                }
-                printf("\n");
-            }
-        }
-        else if (_isGff == true && _isVcf == false) {
-            if (this->bedType == 8) {
-                printf (".\t.\t.\t-1\t-1\t-1\t.\t.\n");
-            }
-            else if (this->bedType == 9) {
-                printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\n");
-            }
-        }
-    }
-
-
-};
-
-#endif /* BEDFILE_H */