Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h @ 1:bec36315bd12 default tip
Deleted selected files
author | aaronquinlan |
---|---|
date | Sat, 19 Nov 2011 14:17:03 -0500 |
parents | dfcd8b6c1bda |
children |
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/utils/bedFile/bedFile.h Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1167 +0,0 @@ -/***************************************************************************** - bedFile.h - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licensed under the GNU General Public License 2.0 license. -******************************************************************************/ -#ifndef BEDFILE_H -#define BEDFILE_H - -// "local" includes -#include "gzstream.h" -#include "lineFileUtilities.h" -#include "fileType.h" - -// standard includes -#include <vector> -#include <map> -#include <set> -#include <string> -#include <iostream> -#include <fstream> -#include <sstream> -#include <cstring> -#include <algorithm> -#include <limits.h> -#include <stdint.h> -#include <cstdio> -//#include <tr1/unordered_map> // Experimental. -using namespace std; - - -//************************************************* -// Data type tydedef -//************************************************* -typedef uint32_t CHRPOS; -typedef uint16_t BINLEVEL; -typedef uint32_t BIN; -typedef uint16_t USHORT; -typedef uint32_t UINT; - -//************************************************* -// Genome binning constants -//************************************************* - -const BIN _numBins = 37450; -const BINLEVEL _binLevels = 7; - -// bins range in size from 16kb to 512Mb -// Bin 0 spans 512Mbp, # Level 1 -// Bins 1-8 span 64Mbp, # Level 2 -// Bins 9-72 span 8Mbp, # Level 3 -// Bins 73-584 span 1Mbp # Level 4 -// Bins 585-4680 span 128Kbp # Level 5 -// Bins 4681-37449 span 16Kbp # Level 6 -const BIN _binOffsetsExtended[] = {32678+4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0}; -//const BIN _binOffsetsExtended[] = {4096+512+64+8+1, 4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0}; - -const USHORT _binFirstShift = 14; /* How much to shift to get to finest bin. */ -const USHORT _binNextShift = 3; /* How much to shift to get to next larger bin. */ - - -//************************************************* -// Common data structures -//************************************************* - -struct DEPTH { - UINT starts; - UINT ends; -}; - - -/* - Structure for regular BED records -*/ -struct BED { - - // Regular BED fields - string chrom; - CHRPOS start; - CHRPOS end; - string name; - string score; - string strand; - - // Add'l fields for BED12 and/or custom BED annotations - vector<string> otherFields; - - // experimental fields for the FJOIN approach. - bool zeroLength; - bool added; - bool finished; - // list of hits from another file. - vector<BED> overlaps; - -public: - // constructors - - // Null - BED() - : chrom(""), - start(0), - end(0), - name(""), - score(""), - strand(""), - otherFields(), - zeroLength(false), - added(false), - finished(false), - overlaps() - {} - - // BED3 - BED(string chrom, CHRPOS start, CHRPOS end) - : chrom(chrom), - start(start), - end(end), - name(""), - score(""), - strand(""), - otherFields(), - zeroLength(false), - added(false), - finished(false), - overlaps() - {} - - // BED4 - BED(string chrom, CHRPOS start, CHRPOS end, string strand) - : chrom(chrom), - start(start), - end(end), - name(""), - score(""), - strand(strand), - otherFields(), - zeroLength(false), - added(false), - finished(false), - overlaps() - {} - - // BED6 - BED(string chrom, CHRPOS start, CHRPOS end, string name, - string score, string strand) - : chrom(chrom), - start(start), - end(end), - name(name), - score(score), - strand(strand), - otherFields(), - zeroLength(false), - added(false), - finished(false), - overlaps() - {} - - // BEDALL - BED(string chrom, CHRPOS start, CHRPOS end, string name, - string score, string strand, vector<string> otherFields) - : chrom(chrom), - start(start), - end(end), - name(name), - score(score), - strand(strand), - otherFields(otherFields), - zeroLength(false), - added(false), - finished(false), - overlaps() - {} - - int size() { - return end-start; - } - -}; // BED - - -/* - Structure for each end of a paired BED record - mate points to the other end. -*/ -struct MATE { - BED bed; - int lineNum; - MATE *mate; -}; - - -/* - Structure for regular BED COVERAGE records -*/ -struct BEDCOV { - - string chrom; - - // Regular BED fields - CHRPOS start; - CHRPOS end; - string name; - string score; - string strand; - - // Add'l fields for BED12 and/or custom BED annotations - vector<string> otherFields; - - // flag a zero-length feature - bool zeroLength; - - // Additional fields specific to computing coverage - map<unsigned int, DEPTH> depthMap; - unsigned int count; - CHRPOS minOverlapStart; - - - public: - // constructors - // Null - BEDCOV() - : chrom(""), - start(0), - end(0), - name(""), - score(""), - strand(""), - otherFields(), - zeroLength(false), - depthMap(), - count(0), - minOverlapStart(0) - {} -}; - - -/* - Structure for BED COVERAGE records having lists of - multiple coverages -*/ -struct BEDCOVLIST { - - // Regular BED fields - string chrom; - CHRPOS start; - CHRPOS end; - string name; - string score; - string strand; - - // Add'l fields for BED12 and/or custom BED annotations - vector<string> otherFields; - - // flag a zero-length feature - bool zeroLength; - - // Additional fields specific to computing coverage - vector< map<unsigned int, DEPTH> > depthMapList; - vector<unsigned int> counts; - vector<CHRPOS> minOverlapStarts; - - - public: - // constructors - // Null - BEDCOVLIST() - : chrom(""), - start(0), - end(0), - name(""), - score(""), - strand(""), - otherFields(), - zeroLength(false), - depthMapList(), - counts(0), - minOverlapStarts(0) - {} -}; - - -// enum to flag the state of a given line in a BED file. -enum BedLineStatus -{ - BED_INVALID = -1, - BED_HEADER = 0, - BED_BLANK = 1, - BED_VALID = 2 -}; - -// enum to indicate the type of file we are dealing with -enum FileType -{ - BED_FILETYPE, - GFF_FILETYPE, - VCF_FILETYPE -}; - -//************************************************* -// Data structure typedefs -//************************************************* -typedef vector<BED> bedVector; -typedef vector<BEDCOV> bedCovVector; -typedef vector<MATE> mateVector; -typedef vector<BEDCOVLIST> bedCovListVector; - -typedef map<BIN, bedVector, std::less<BIN> > binsToBeds; -typedef map<BIN, bedCovVector, std::less<BIN> > binsToBedCovs; -typedef map<BIN, mateVector, std::less<BIN> > binsToMates; -typedef map<BIN, bedCovListVector, std::less<BIN> > binsToBedCovLists; - -typedef map<string, binsToBeds, std::less<string> > masterBedMap; -typedef map<string, binsToBedCovs, std::less<string> > masterBedCovMap; -typedef map<string, binsToMates, std::less<string> > masterMateMap; -typedef map<string, binsToBedCovLists, std::less<string> > masterBedCovListMap; -typedef map<string, bedVector, std::less<string> > masterBedMapNoBin; - - -// EXPERIMENTAL - wait for TR1 -// typedef vector<BED> bedVector; -// typedef vector<BEDCOV> bedCovVector; -// -// typedef tr1::unordered_map<BIN, bedVector> binsToBeds; -// typedef tr1::unordered_map<BIN, bedCovVector> binsToBedCovs; -// -// typedef tr1::unordered_map<string, binsToBeds> masterBedMap; -// typedef tr1::unordered_map<string, binsToBedCovs> masterBedCovMap; -// typedef tr1::unordered_map<string, bedVector> masterBedMapNoBin; - - - -// return the genome "bin" for a feature with this start and end -inline -BIN getBin(CHRPOS start, CHRPOS end) { - --end; - start >>= _binFirstShift; - end >>= _binFirstShift; - - for (register short i = 0; i < _binLevels; ++i) { - if (start == end) return _binOffsetsExtended[i] + start; - start >>= _binNextShift; - end >>= _binNextShift; - } - cerr << "start " << start << ", end " << end << " out of range in findBin (max is 512M)" << endl; - return 0; -} - -/**************************************************** -// isInteger(s): Tests if string s is a valid integer -*****************************************************/ -inline bool isInteger(const std::string& s) { - int len = s.length(); - for (int i = 0; i < len; i++) { - if (!std::isdigit(s[i])) return false; - } - return true; -} - - -// return the amount of overlap between two features. Negative if none and the the -// number of negative bases is the distance between the two. -inline -int overlaps(CHRPOS aS, CHRPOS aE, CHRPOS bS, CHRPOS bE) { - return min(aE, bE) - max(aS, bS); -} - -// is A after (to the right of) B? -inline -bool after(const BED &a, const BED &b) { - return (a.start >= b.end); -} - - -// Ancillary functions -void splitBedIntoBlocks(const BED &bed, int lineNum, bedVector &bedBlocks); - - -// BED Sorting Methods -bool sortByChrom(const BED &a, const BED &b); -bool sortByStart(const BED &a, const BED &b); -bool sortBySizeAsc(const BED &a, const BED &b); -bool sortBySizeDesc(const BED &a, const BED &b); -bool sortByScoreAsc(const BED &a, const BED &b); -bool sortByScoreDesc(const BED &a, const BED &b); -bool byChromThenStart(BED const &a, BED const &b); - - - -//************************************************ -// BedFile Class methods and elements -//************************************************ -class BedFile { - -public: - - // Constructor - BedFile(string &); - - // Destructor - ~BedFile(void); - - // Open a BED file for reading (creates an istream pointer) - void Open(void); - - // Rewind the pointer back to the beginning of the file - void Rewind(void); - - // Jump to a specific byte in the file - void Seek(unsigned long offset); - - bool Empty(); - - // Close an opened BED file. - void Close(void); - - // Get the next BED entry in an opened BED file. - BedLineStatus GetNextBed (BED &bed, int &lineNum, bool forceSorted = false); - - // Returns the next MERGED (i.e., non-overlapping) interval in an opened BED file - // NOTE: assumes input file is sorted by chrom then start - bool GetNextMergedBed(BED &merged_bed, int &lineNum); - - // load a BED file into a map keyed by chrom, then bin. value is vector of BEDs - void loadBedFileIntoMap(); - - // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVs - void loadBedCovFileIntoMap(); - - // load a BED file into a map keyed by chrom, then bin. value is vector of BEDCOVLISTs - void loadBedCovListFileIntoMap(); - - // load a BED file into a map keyed by chrom. value is vector of BEDs - void loadBedFileIntoMapNoBin(); - - // Given a chrom, start, end and strand for a single feature, - // search for all overlapping features in another BED file. - // Searches through each relevant genome bin on the same chromosome - // as the single feature. Note: Adapted from kent source "binKeeperFind" - void FindOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, vector<BED> &hits, bool sameStrand, bool diffStrand); - - // return true if at least one overlap was found. otherwise, return false. - bool FindOneOrMoreOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, - bool sameStrand, bool diffStrand, float overlapFraction = 0.0); - - // return true if at least one __reciprocal__ overlap was found. otherwise, return false. - bool FindOneOrMoreReciprocalOverlapsPerBin(string chrom, CHRPOS start, CHRPOS end, string strand, - bool sameStrand, bool diffStrand, float overlapFraction = 0.0); - - // Given a chrom, start, end and strand for a single feature, - // increment a the number of hits for each feature in B file - // that the feature overlaps - void countHits(const BED &a, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false); - - // same as above, but has special logic that processes a set of - // BED "blocks" from a single entry so as to avoid over-counting - // each "block" of a single BAM/BED12 as distinct coverage. That is, - // if one read has four block, we only want to count the coverage as - // coming from one read, not four. - void countSplitHits(const vector<BED> &bedBlock, bool sameStrand = false, bool diffStrand = false, bool countsOnly = false); - - // Given a chrom, start, end and strand for a single feature, - // increment a the number of hits for each feature in B file - // that the feature overlaps - void countListHits(const BED &a, int index, bool sameStrand, bool diffStrand); - - // the bedfile with which this instance is associated - string bedFile; - unsigned int bedType; // 3-6, 12 for BED - // 9 for GFF - bool isZeroBased; - - // Main data structires used by BEDTools - masterBedCovMap bedCovMap; - masterBedCovListMap bedCovListMap; - masterBedMap bedMap; - masterBedMapNoBin bedMapNoBin; - -private: - - // data - bool _isGff; - bool _isVcf; - bool _typeIsKnown; // do we know the type? (i.e., BED, GFF, VCF) - FileType _fileType; // what is the file type? (BED? GFF? VCF?) - istream *_bedStream; - string _bedLine; - vector<string> _bedFields; - int _merged_start; - int _merged_end; - string _merged_chrom; - int _prev_start; - string _prev_chrom; - - void setZeroBased(bool zeroBased); - void setGff (bool isGff); - void setVcf (bool isVcf); - void setFileType (FileType type); - void setBedType (int colNums); - - /****************************************************** - Private definitions to circumvent linker issues with - templated member functions. - *******************************************************/ - - /* - parseLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.) - */ - template <typename T> - inline BedLineStatus parseLine (T &bed, const vector<string> &lineVector, int &lineNum) { - - //char *p2End, *p3End, *p4End, *p5End; - //long l2, l3, l4, l5; - unsigned int numFields = lineVector.size(); - - // bail out if we have a blank line - if (numFields == 0) { - return BED_BLANK; - } - - if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") == string::npos) && (lineVector[0].find("#") == string::npos) ) { - - if (numFields >= 3) { - // line parsing for all lines after the first non-header line - if (_typeIsKnown == true) { - switch(_fileType) { - case BED_FILETYPE: - if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; - case VCF_FILETYPE: - if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; - case GFF_FILETYPE: - if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; - default: - printf("ERROR: file type encountered. Exiting\n"); - exit(1); - } - } - // line parsing for first non-header line: figure out file contents - else { - // it's BED format if columns 2 and 3 are integers - if (isInteger(lineVector[1]) && isInteger(lineVector[2])) { - setGff(false); - setZeroBased(true); - setFileType(BED_FILETYPE); - setBedType(numFields); // we now expect numFields columns in each line - if (parseBedLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; - } - // it's VCF, assuming the second column is numeric and there are at least 8 fields. - else if (isInteger(lineVector[1]) && numFields >= 8) { - setGff(false); - setVcf(true); - setZeroBased(false); - setFileType(VCF_FILETYPE); - setBedType(numFields); // we now expect numFields columns in each line - if (parseVcfLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; - } - // it's GFF, assuming columns columns 4 and 5 are numeric and we have 9 fields total. - else if ((numFields >= 8) && isInteger(lineVector[3]) && isInteger(lineVector[4])) { - setGff(true); - setZeroBased(false); - setFileType(GFF_FILETYPE); - setBedType(numFields); // we now expect numFields columns in each line - if (parseGffLine(bed, lineVector, lineNum, numFields) == true) return BED_VALID; - } - else { - cerr << "Unexpected file format. Please use tab-delimited BED, GFF, or VCF. " << - "Perhaps you have non-integer starts or ends at line " << lineNum << "?" << endl; - exit(1); - } - } - } - else { - cerr << "It looks as though you have less than 3 columns at line: " << lineNum << ". Are you sure your files are tab-delimited?" << endl; - exit(1); - } - } - else { - lineNum--; - return BED_HEADER; - } - // default - return BED_INVALID; - } - - - /* - parseBedLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.) - */ - template <typename T> - inline bool parseBedLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) { - - // process as long as the number of fields in this - // line matches what we expect for this file. - if (numFields == this->bedType) { - bed.chrom = lineVector[0]; - int i; - i = atoi(lineVector[1].c_str()); - if (i<0) { - cerr << "Error: malformed BED entry at line " << lineNum << ". Start Coordinate detected that is < 0. Exiting." << endl; - exit(1); - } - bed.start = (CHRPOS)i; - i = atoi(lineVector[2].c_str()); - if (i<0) { - cerr << "Error: malformed BED entry at line " << lineNum << ". End Coordinate detected that is < 0. Exiting." << endl; - exit(1); - } - bed.end = (CHRPOS)i; - - // handle starts == end (e.g., insertions in reference genome) - if (bed.start == bed.end) { - bed.start--; - bed.end++; - bed.zeroLength = true; - } - - if (this->bedType == 4) { - bed.name = lineVector[3]; - } - else if (this->bedType == 5) { - bed.name = lineVector[3]; - bed.score = lineVector[4]; - } - else if (this->bedType == 6) { - bed.name = lineVector[3]; - bed.score = lineVector[4]; - bed.strand = lineVector[5]; - } - else if (this->bedType > 6) { - bed.name = lineVector[3]; - bed.score = lineVector[4]; - bed.strand = lineVector[5]; - for (unsigned int i = 6; i < lineVector.size(); ++i) { - bed.otherFields.push_back(lineVector[i]); - } - } - else if (this->bedType != 3) { - cerr << "Error: unexpected number of fields at line: " << lineNum - << ". Verify that your files are TAB-delimited. Exiting..." << endl; - exit(1); - } - - // sanity checks. - if (bed.start <= bed.end) { - return true; - } - else { - cerr << "Error: malformed BED entry at line " << lineNum << ". Start was greater than end. Exiting." << endl; - exit(1); - } - } - else if (numFields == 1) { - cerr << "Only one BED field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl; - exit(1); - } - else if ((numFields != this->bedType) && (numFields != 0)) { - cerr << "Differing number of BED fields encountered at line: " << lineNum << ". Exiting..." << endl; - exit(1); - } - else if ((numFields < 3) && (numFields != 0)) { - cerr << "TAB delimited BED file with at least 3 fields (chrom, start, end) is required at line: "<< lineNum << ". Exiting..." << endl; - exit(1); - } - return false; - } - - - /* - parseVcfLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.) - */ - template <typename T> - inline bool parseVcfLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) { - if (numFields == this->bedType) { - bed.chrom = lineVector[0]; - bed.start = atoi(lineVector[1].c_str()) - 1; // VCF is one-based - bed.end = bed.start + lineVector[3].size(); // VCF 4.0 stores the size of the affected REF allele. - bed.strand = "+"; - // construct the name from the ref and alt alleles. - // if it's an annotated variant, add the rsId as well. - bed.name = lineVector[3] + "/" + lineVector[4]; - if (lineVector[2] != ".") { - bed.name += "_" + lineVector[2]; - } - - if (this->bedType > 2) { - for (unsigned int i = 2; i < numFields; ++i) - bed.otherFields.push_back(lineVector[i]); - } - - if ((bed.start <= bed.end) && (bed.start >= 0) && (bed.end >= 0)) { - return true; - } - else if (bed.start > bed.end) { - cerr << "Error: malformed VCF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl; - exit(1); - } - else if ( (bed.start < 0) || (bed.end < 0) ) { - cerr << "Error: malformed VCF entry at line " << lineNum << ". Coordinate detected that is < 0. Exiting." << endl; - exit(1); - } - } - else if (numFields == 1) { - cerr << "Only one VCF field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl; - exit(1); - } - else if ((numFields != this->bedType) && (numFields != 0)) { - cerr << "Differing number of VCF fields encountered at line: " << lineNum << ". Exiting..." << endl; - exit(1); - } - else if ((numFields < 2) && (numFields != 0)) { - cerr << "TAB delimited VCF file with at least 2 fields (chrom, pos) is required at line: "<< lineNum << ". Exiting..." << endl; - exit(1); - } - return false; - } - - - - /* - parseGffLine: converts a lineVector into either BED or BEDCOV (templated, hence in header to avoid linker issues.) - */ - template <typename T> - inline bool parseGffLine (T &bed, const vector<string> &lineVector, int lineNum, unsigned int numFields) { - if (numFields == this->bedType) { - if (this->bedType >= 8 && _isGff) { - bed.chrom = lineVector[0]; - if (isInteger(lineVector[3])) - bed.start = atoi(lineVector[3].c_str()); - if (isInteger(lineVector[4])) - bed.end = atoi(lineVector[4].c_str()); - bed.name = lineVector[2]; - bed.score = lineVector[5]; - bed.strand = lineVector[6].c_str(); - bed.otherFields.push_back(lineVector[1]); // add GFF "source". unused in BED - bed.otherFields.push_back(lineVector[7]); // add GFF "fname". unused in BED - // handle the optional 9th field. - if (this->bedType == 9) - bed.otherFields.push_back(lineVector[8]); // add GFF "group". unused in BED - bed.start--; - } - else { - cerr << "Error: unexpected number of fields at line: " << lineNum << - ". Verify that your files are TAB-delimited and that your GFF file has 8 or 9 fields. Exiting..." << endl; - exit(1); - } - if (bed.start > bed.end) { - cerr << "Error: malformed GFF entry at line " << lineNum << ". Start was greater than end. Exiting." << endl; - exit(1); - } - if ( (bed.start < 0) || (bed.end < 0) ) { - cerr << "Error: malformed GFF entry at line " << lineNum << ". Coordinate detected that is < 1. Exiting." << endl; - exit(1); - } - return true; - } - else if (numFields == 1) { - cerr << "Only one GFF field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl; - exit(1); - } - else if ((numFields != this->bedType) && (numFields != 0)) { - cerr << "Differing number of GFF fields encountered at line: " << lineNum << ". Exiting..." << endl; - exit(1); - } - else if ((numFields < 8) && (numFields != 0)) { - cerr << "TAB delimited GFF file with 8 or 9 fields is required at line: "<< lineNum << ". Exiting..." << endl; - exit(1); - } - return false; - } - - -public: - - /* - reportBedTab - - Writes the _original_ BED entry with a TAB - at the end of the line. - Works for BED3 - BED6. - */ - template <typename T> - inline void reportBedTab(const T &bed) { - - // if it is azeroLength feature, we need to - // correct the start and end coords to what they were - // in the original file - CHRPOS start = bed.start; - CHRPOS end = bed.end; - if (bed.zeroLength) { - if (_isGff == false) - start++; - end--; - } - - // BED - if (_isGff == false && _isVcf == false) { - if (this->bedType == 3) { - printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end); - } - else if (this->bedType == 4) { - printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str()); - } - else if (this->bedType == 5) { - printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str()); - } - else if (this->bedType == 6) { - printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str(), bed.strand.c_str()); - } - else if (this->bedType > 6) { - printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str(), bed.strand.c_str()); - - vector<string>::const_iterator othIt = bed.otherFields.begin(); - vector<string>::const_iterator othEnd = bed.otherFields.end(); - for ( ; othIt != othEnd; ++othIt) { - printf("%s\t", othIt->c_str()); - } - } - } - // VCF - else if (_isGff == false && _isVcf == true) { - printf ("%s\t%d\t", bed.chrom.c_str(), start+1); - - vector<string>::const_iterator othIt = bed.otherFields.begin(); - vector<string>::const_iterator othEnd = bed.otherFields.end(); - for ( ; othIt != othEnd; ++othIt) { - printf("%s\t", othIt->c_str()); - } - } - // GFF - else if (_isGff == true) { - // "GFF-8" - if (this->bedType == 8) { - printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(), - bed.name.c_str(), start+1, end, - bed.score.c_str(), bed.strand.c_str(), - bed.otherFields[1].c_str()); - } - // "GFF-9" - else if (this->bedType == 9) { - printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(), - bed.name.c_str(), start+1, end, - bed.score.c_str(), bed.strand.c_str(), - bed.otherFields[1].c_str(), bed.otherFields[2].c_str()); - } - } - } - - - - /* - reportBedNewLine - - Writes the _original_ BED entry with a NEWLINE - at the end of the line. - Works for BED3 - BED6. - */ - template <typename T> - inline void reportBedNewLine(const T &bed) { - - // if it is azeroLength feature, we need to - // correct the start and end coords to what they were - // in the original file - CHRPOS start = bed.start; - CHRPOS end = bed.end; - if (bed.zeroLength) { - if (_isGff == false) - start++; - end--; - } - - //BED - if (_isGff == false && _isVcf == false) { - if (this->bedType == 3) { - printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end); - } - else if (this->bedType == 4) { - printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str()); - } - else if (this->bedType == 5) { - printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str()); - } - else if (this->bedType == 6) { - printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str(), bed.strand.c_str()); - } - else if (this->bedType > 6) { - printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str(), bed.strand.c_str()); - - vector<string>::const_iterator othIt = bed.otherFields.begin(); - vector<string>::const_iterator othEnd = bed.otherFields.end(); - for ( ; othIt != othEnd; ++othIt) { - printf("\t%s", othIt->c_str()); - } - printf("\n"); - } - } - // VCF - else if (_isGff == false && _isVcf == true) { - printf ("%s\t%d\t", bed.chrom.c_str(), start+1); - - vector<string>::const_iterator othIt = bed.otherFields.begin(); - vector<string>::const_iterator othEnd = bed.otherFields.end(); - for ( ; othIt != othEnd; ++othIt) { - printf("%s\t", othIt->c_str()); - } - printf("\n"); - } - // GFF - else if (_isGff == true) { - // "GFF-8" - if (this->bedType == 8) { - printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(), - bed.name.c_str(), start+1, end, - bed.score.c_str(), bed.strand.c_str(), - bed.otherFields[1].c_str()); - } - // "GFF-9" - else if (this->bedType == 9) { - printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(), - bed.name.c_str(), start+1, end, - bed.score.c_str(), bed.strand.c_str(), - bed.otherFields[1].c_str(), bed.otherFields[2].c_str()); - } - } - } - - - - /* - reportBedRangeNewLine - - Writes a custom start->end for a BED entry - with a NEWLINE at the end of the line. - - Works for BED3 - BED6. - */ - template <typename T> - inline void reportBedRangeTab(const T &bed, CHRPOS start, CHRPOS end) { - - // if it is azeroLength feature, we need to - // correct the start and end coords to what they were - // in the original file - if (bed.zeroLength) { - start = bed.start + 1; - end = bed.end - 1; - } - // BED - if (_isGff == false && _isVcf == false) { - if (this->bedType == 3) { - printf ("%s\t%d\t%d\t", bed.chrom.c_str(), start, end); - } - else if (this->bedType == 4) { - printf ("%s\t%d\t%d\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str()); - } - else if (this->bedType == 5) { - printf ("%s\t%d\t%d\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str()); - } - else if (this->bedType == 6) { - printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str(), bed.strand.c_str()); - } - else if (this->bedType > 6) { - printf ("%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str(), bed.strand.c_str()); - - vector<string>::const_iterator othIt = bed.otherFields.begin(); - vector<string>::const_iterator othEnd = bed.otherFields.end(); - for ( ; othIt != othEnd; ++othIt) { - printf("%s\t", othIt->c_str()); - } - } - } - // VCF - else if (_isGff == false && _isVcf == true) { - printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1); - - vector<string>::const_iterator othIt = bed.otherFields.begin(); - vector<string>::const_iterator othEnd = bed.otherFields.end(); - for ( ; othIt != othEnd; ++othIt) { - printf("%s\t", othIt->c_str()); - } - } - // GFF - else if (_isGff == true) { - // "GFF-8" - if (this->bedType == 8) { - printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(), - bed.name.c_str(), start+1, end, - bed.score.c_str(), bed.strand.c_str(), - bed.otherFields[1].c_str()); - } - // "GFF-9" - else if (this->bedType == 9) { - printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", bed.chrom.c_str(), bed.otherFields[0].c_str(), - bed.name.c_str(), start+1, end, - bed.score.c_str(), bed.strand.c_str(), - bed.otherFields[1].c_str(), bed.otherFields[2].c_str()); - } - } - } - - - - /* - reportBedRangeTab - - Writes a custom start->end for a BED entry - with a TAB at the end of the line. - - Works for BED3 - BED6. - */ - template <typename T> - inline void reportBedRangeNewLine(const T &bed, CHRPOS start, CHRPOS end) { - - // if it is azeroLength feature, we need to - // correct the start and end coords to what they were - // in the original file - if (bed.zeroLength) { - start = bed.start + 1; - end = bed.end - 1; - } - // BED - if (_isGff == false && _isVcf == false) { - if (this->bedType == 3) { - printf ("%s\t%d\t%d\n", bed.chrom.c_str(), start, end); - } - else if (this->bedType == 4) { - printf ("%s\t%d\t%d\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str()); - } - else if (this->bedType == 5) { - printf ("%s\t%d\t%d\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str()); - } - else if (this->bedType == 6) { - printf ("%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str(), bed.strand.c_str()); - } - else if (this->bedType > 6) { - printf ("%s\t%d\t%d\t%s\t%s\t%s", bed.chrom.c_str(), start, end, bed.name.c_str(), - bed.score.c_str(), bed.strand.c_str()); - - vector<string>::const_iterator othIt = bed.otherFields.begin(); - vector<string>::const_iterator othEnd = bed.otherFields.end(); - for ( ; othIt != othEnd; ++othIt) { - printf("\t%s", othIt->c_str()); - } - printf("\n"); - } - } - // VCF - else if (_isGff == false && _isVcf == true) { - printf ("%s\t%d\t", bed.chrom.c_str(), bed.start+1); - - vector<string>::const_iterator othIt = bed.otherFields.begin(); - vector<string>::const_iterator othEnd = bed.otherFields.end(); - for ( ; othIt != othEnd; ++othIt) { - printf("%s\t", othIt->c_str()); - } - printf("\n"); - } - // GFF - else if (_isGff == true) { - // "GFF-9" - if (this->bedType == 8) { - printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(), - bed.name.c_str(), start+1, end, - bed.score.c_str(), bed.strand.c_str(), - bed.otherFields[1].c_str()); - } - // "GFF-8" - else if (this->bedType == 9) { - printf ("%s\t%s\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", bed.chrom.c_str(), bed.otherFields[0].c_str(), - bed.name.c_str(), start+1, end, - bed.score.c_str(), bed.strand.c_str(), - bed.otherFields[1].c_str(), bed.otherFields[2].c_str()); - } - } - } - - - /* - reportNullBedTab - */ - void reportNullBedTab() { - - if (_isGff == false && _isVcf == false) { - if (this->bedType == 3) { - printf (".\t-1\t-1\t"); - } - else if (this->bedType == 4) { - printf (".\t-1\t-1\t.\t"); - } - else if (this->bedType == 5) { - printf (".\t-1\t-1\t.\t-1\t"); - } - else if (this->bedType == 6) { - printf (".\t-1\t-1\t.\t-1\t.\t"); - } - else if (this->bedType > 6) { - printf (".\t-1\t-1\t.\t-1\t.\t"); - for (unsigned int i = 6; i < this->bedType; ++i) { - printf(".\t"); - } - } - } - else if (_isGff == true && _isVcf == false) { - if (this->bedType == 8) { - printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t"); - } - else if (this->bedType == 9) { - printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\t"); - } - } - } - - - /* - reportNullBedTab - */ - void reportNullBedNewLine() { - - if (_isGff == false && _isVcf == false) { - if (this->bedType == 3) { - printf (".\t-1\t-1\n"); - } - else if (this->bedType == 4) { - printf (".\t-1\t-1\t.\n"); - } - else if (this->bedType == 5) { - printf (".\t-1\t-1\t.\t-1\n"); - } - else if (this->bedType == 6) { - printf (".\t-1\t-1\t.\t-1\t.\n"); - } - else if (this->bedType > 6) { - printf (".\t-1\t-1\t.\t-1\t."); - for (unsigned int i = 6; i < this->bedType; ++i) { - printf("\t."); - } - printf("\n"); - } - } - else if (_isGff == true && _isVcf == false) { - if (this->bedType == 8) { - printf (".\t.\t.\t-1\t-1\t-1\t.\t.\n"); - } - else if (this->bedType == 9) { - printf (".\t.\t.\t-1\t-1\t-1\t.\t.\t.\n"); - } - } - } - - -}; - -#endif /* BEDFILE_H */