annotate BEDTools-Version-2.14.3/src/utils/bedFilePE/bedFilePE.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 // bedFilePE.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3 // BEDTools
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 // Created by Aaron Quinlan Spring 2009.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 // Copyright 2009 Aaron Quinlan. All rights reserved.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 // Summary: Contains common functions for finding BED overlaps.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9 //
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 // Acknowledgments: Much of the code herein is taken from Jim Kent's
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 // BED processing code. I am grateful for his elegant
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 // genome binning algorithm and therefore use it extensively.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15 #include "bedFilePE.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 // Constructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 BedFilePE::BedFilePE(string &bedFile) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20 this->bedFile = bedFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 // Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 BedFilePE::~BedFilePE(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 void BedFilePE::Open(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 if (bedFile == "stdin" || bedFile == "-") {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 _bedStream = &cin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 _bedStream = new ifstream(bedFile.c_str(), ios::in);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34 if (isGzipFile(_bedStream) == true) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35 delete _bedStream;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 _bedStream = new igzstream(bedFile.c_str(), ios::in);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 // can we open the file?
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 if ( !(_bedStream->good()) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 cerr << "Error: The requested bed file (" << bedFile << ") could not be opened. Exiting!" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41 exit (1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48 // Close the BEDPE file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 void BedFilePE::Close(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50 if (bedFile != "stdin" && bedFile != "-") delete _bedStream;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 BedLineStatus BedFilePE::GetNextBedPE (BEDPE &bedpe, int &lineNum) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 // make sure there are still lines to process.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57 // if so, tokenize, validate and return the BEDPE entry.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 if (_bedStream->good()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 string bedPELine;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 vector<string> bedPEFields;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 bedPEFields.reserve(10);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 // parse the bedStream pointer
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64 getline(*_bedStream, bedPELine);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 lineNum++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 // split into a string vector.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 Tokenize(bedPELine,bedPEFields);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 // load the BEDPE struct as long as it's a valid BEDPE entry.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 return parseLine(bedpe, bedPEFields, lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 // default if file is closed or EOF
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 return BED_INVALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 reportBedPETab
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 Writes the _original_ BED entry for A.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 Works for BEDPE only.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84 void BedFilePE::reportBedPETab(const BEDPE &a) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 printf("%s\t%d\t%d\t%s\t%d\t%d\t", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 a.chrom2.c_str(), a.start2, a.end2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 else if (this->bedType == 7) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92 a.chrom2.c_str(), a.start2, a.end2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 a.name.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 else if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 a.chrom2.c_str(), a.start2, a.end2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 a.name.c_str(), a.score.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100 else if (this->bedType == 10) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s\t", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 a.chrom2.c_str(), a.start2, a.end2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
104 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
105 else if (this->bedType > 10) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
106 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
107 a.chrom2.c_str(), a.start2, a.end2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
108 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
109
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
110 vector<string>::const_iterator othIt = a.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
111 vector<string>::const_iterator othEnd = a.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
112 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
113 printf("\t%s", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
114 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
115 printf("\t");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
116 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
117 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
118
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
119
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
120
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
121 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
122 reportBedPENewLine
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
123
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
124 Writes the _original_ BED entry for A.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
125 Works for BEDPE only.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
126 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
127 void BedFilePE::reportBedPENewLine(const BEDPE &a) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
128
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
129 if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
130 printf("%s\t%d\t%d\t%s\t%d\t%d\n", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
131 a.chrom2.c_str(), a.start2, a.end2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
132 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
133 else if (this->bedType == 7) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
134 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\n", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
135 a.chrom2.c_str(), a.start2, a.end2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
136 a.name.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
137 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
138 else if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
139 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\n", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
140 a.chrom2.c_str(), a.start2, a.end2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
141 a.name.c_str(), a.score.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
142 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
143 else if (this->bedType == 10) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
144 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s\n", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
145 a.chrom2.c_str(), a.start2, a.end2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
146 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
147 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
148 else if (this->bedType > 10) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
149 printf("%s\t%d\t%d\t%s\t%d\t%d\t%s\t%s\t%s\t%s", a.chrom1.c_str(), a.start1, a.end1,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
150 a.chrom2.c_str(), a.start2, a.end2,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
151 a.name.c_str(), a.score.c_str(), a.strand1.c_str(), a.strand2.c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
152
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
153 vector<string>::const_iterator othIt = a.otherFields.begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
154 vector<string>::const_iterator othEnd = a.otherFields.end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
155 for ( ; othIt != othEnd; ++othIt) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
156 printf("\t%s", othIt->c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
157 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
158 printf("\n");
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
159 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
160 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
161
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
162
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
163 BedLineStatus BedFilePE::parseLine (BEDPE &bedpe, const vector<string> &lineVector, int &lineNum) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
164
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
165 // bail out if we have a blank line
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
166 if (lineVector.empty())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
167 return BED_BLANK;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
168
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
169 if ((lineVector[0].find("track") == string::npos) && (lineVector[0].find("browser") == string::npos) && (lineVector[0].find("#") == string::npos) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
170 // we need at least 6 columns
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
171 if (lineVector.size() >= 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
172 if (parseBedPELine(bedpe, lineVector, lineNum) == true)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
173 return BED_VALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
174 else return BED_INVALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
175 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
176 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
177 cerr << "It looks as though you have less than 6 columns. Are you sure your files are tab-delimited?" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
178 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
179 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
180 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
181 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
182 lineNum--;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
183 return BED_HEADER;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
184 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
185
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
186 // default
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
187 return BED_INVALID;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
188 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
189
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
190
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
191 bool BedFilePE::parseBedPELine (BEDPE &bed, const vector<string> &lineVector, const int &lineNum) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
192
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
193 if ((lineNum == 1) && (lineVector.size() >= 6)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
194
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
195 this->bedType = lineVector.size();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
196
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
197 if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
198 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
199 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
200 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
201
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
202 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
203 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
204 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
205
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
206 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
207 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
208 else if (this->bedType == 7) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
209 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
210 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
211 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
212
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
213 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
214 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
215 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
216
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
217 bed.name = lineVector[6];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
218 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
219 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
220 else if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
221 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
222 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
223 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
224
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
225 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
226 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
227 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
228
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
229 bed.name = lineVector[6];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
230 bed.score = lineVector[7].c_str();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
231 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
232 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
233 else if (this->bedType == 10) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
234 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
235 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
236 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
237
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
238 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
239 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
240 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
241
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
242 bed.name = lineVector[6];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
243 bed.score = lineVector[7].c_str();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
244
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
245 bed.strand1 = lineVector[8];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
246 bed.strand2 = lineVector[9];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
247
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
248 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
249 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
250 else if (this->bedType > 10) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
251 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
252 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
253 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
254
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
255 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
256 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
257 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
258
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
259 bed.name = lineVector[6];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
260 bed.score = lineVector[7].c_str();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
261
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
262 bed.strand1 = lineVector[8];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
263 bed.strand2 = lineVector[9];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
264
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
265 for (unsigned int i = 10; i < lineVector.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
266 bed.otherFields.push_back(lineVector[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
267 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
268 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
269 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
270 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
271 cerr << "Unexpected number of fields: " << lineNum << ". Verify that your files are TAB-delimited and that your BEDPE file has 6,7,8 or 10 fields. Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
272 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
273 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
274
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
275 if (bed.start1 > bed.end1) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
276 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Start1 was greater than End1. Ignoring it and moving on." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
277 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
278 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
279 else if (bed.start2 > bed.end2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
280 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Start2 was greater than End2. Ignoring it and moving on." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
281 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
282 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
283 else if ( (bed.start1 < 0) || (bed.end1 < 0) || (bed.start2 < 0) || (bed.end2 < 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
284 cerr << "Error: malformed BEDPE entry at line " << lineNum << ". Coordinate <= 0. Ignoring it and moving on." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
285 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
286 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
287 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
288 else if ( (lineNum > 1) && (lineVector.size() == this->bedType)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
289
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
290 if (this->bedType == 6) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
291 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
292 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
293 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
294
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
295 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
296 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
297 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
298
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
299 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
300 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
301 else if (this->bedType == 7) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
302 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
303 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
304 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
305
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
306 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
307 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
308 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
309
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
310 bed.name = lineVector[6];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
311 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
312 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
313 else if (this->bedType == 8) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
314 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
315 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
316 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
317
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
318 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
319 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
320 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
321
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
322 bed.name = lineVector[6];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
323 bed.score = lineVector[7].c_str();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
324 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
325 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
326 else if (this->bedType == 10) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
327 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
328 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
329 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
330
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
331 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
332 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
333 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
334
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
335 bed.name = lineVector[6];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
336 bed.score = lineVector[7].c_str();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
337
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
338 bed.strand1 = lineVector[8];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
339 bed.strand2 = lineVector[9];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
340
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
341 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
342 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
343 else if (this->bedType > 10) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
344 bed.chrom1 = lineVector[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
345 bed.start1 = atoi(lineVector[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
346 bed.end1 = atoi(lineVector[2].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
347
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
348 bed.chrom2 = lineVector[3];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
349 bed.start2 = atoi(lineVector[4].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
350 bed.end2 = atoi(lineVector[5].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
351
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
352 bed.name = lineVector[6];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
353 bed.score = lineVector[7].c_str();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
354
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
355 bed.strand1 = lineVector[8];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
356 bed.strand2 = lineVector[9];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
357
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
358 for (unsigned int i = 10; i < lineVector.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
359 bed.otherFields.push_back(lineVector[i]);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
360 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
361 return true;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
362 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
363 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
364 cerr << "Unexpected number of fields: " << lineNum << ". Verify that your files are TAB-delimited and that your BEDPE file has 6,7,8 or 10 fields. Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
365 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
366 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
367
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
368 if (bed.start1 > bed.end1) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
369 cerr << "Error: malformed BED entry at line " << lineNum << ". Start1 was greater than End1. Ignoring it and moving on." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
370 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
371 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
372 else if (bed.start2 > bed.end2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
373 cerr << "Error: malformed BED entry at line " << lineNum << ". Start2 was greater than End2. Ignoring it and moving on." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
374 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
375 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
376 else if ( (bed.start1 < 0) || (bed.end1 < 0) || (bed.start2 < 0) || (bed.end2 < 0) ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
377 cerr << "Error: malformed BED entry at line " << lineNum << ". Coordinate <= 0. Ignoring it and moving on." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
378 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
379 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
380 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
381 else if (lineVector.size() == 1) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
382 cerr << "Only one BED field detected: " << lineNum << ". Verify that your files are TAB-delimited. Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
383 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
384 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
385 else if ((lineVector.size() != this->bedType) && (lineVector.size() != 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
386 cerr << "Differing number of BEDPE fields encountered at line: " << lineNum << ". Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
387 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
388 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
389 else if ((lineVector.size() < 6) && (lineVector.size() != 0)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
390 cerr << "TAB delimited BEDPE file with at least 6 fields (chrom1, start1, end1, chrom2, start2, end2) is required at line: "<< lineNum << ". Exiting..." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
391 exit(1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
392 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
393 return false;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
394 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
395
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
396
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
397 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
398 Adapted from kent source "binKeeperFind"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
399 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
400 void BedFilePE::FindOverlapsPerBin(int bEnd, string chrom, CHRPOS start, CHRPOS end, string name, string strand,
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
401 vector<MATE> &hits, float overlapFraction, bool forceStrand, bool enforceDiffNames) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
402
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
403 int startBin, endBin;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
404 startBin = (start >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
405 endBin = ((end-1) >> _binFirstShift);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
406
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
407 // loop through each bin "level" in the binning hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
408 for (int i = 0; i < _binLevels; ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
409
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
410 // loop through each bin at this level of the hierarchy
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
411 int offset = _binOffsetsExtended[i];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
412 for (int j = (startBin+offset); j <= (endBin+offset); ++j) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
413
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
414 // loop through each feature in this chrom/bin and see if it overlaps
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
415 // with the feature that was passed in. if so, add the feature to
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
416 // the list of hits.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
417 vector<MATE>::const_iterator bedItr;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
418 vector<MATE>::const_iterator bedEnd;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
419 if (bEnd == 1) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
420 bedItr = bedMapEnd1[chrom][j].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
421 bedEnd = bedMapEnd1[chrom][j].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
422 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
423 else if (bEnd == 2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
424 bedItr = bedMapEnd2[chrom][j].begin();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
425 bedEnd = bedMapEnd2[chrom][j].end();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
426 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
427 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
428 cerr << "Unexpected end of B requested" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
429 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
430 for (; bedItr != bedEnd; ++bedItr) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
431 float overlap = overlaps(bedItr->bed.start, bedItr->bed.end, start, end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
432 float size = end - start;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
433
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
434 if ( (overlap / size) >= overlapFraction ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
435
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
436 // skip the hit if not on the same strand (and we care)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
437 if ((forceStrand == false) && (enforceDiffNames == false)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
438 hits.push_back(*bedItr); // it's a hit, add it.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
439 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
440 else if ((forceStrand == true) && (enforceDiffNames == false)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
441 if (strand == bedItr->bed.strand)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
442 hits.push_back(*bedItr); // it's a hit, add it.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
443 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
444 else if ((forceStrand == true) && (enforceDiffNames == true)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
445 if ((strand == bedItr->bed.strand) && (name != bedItr->bed.name))
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
446 hits.push_back(*bedItr); // it's a hit, add it.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
447 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
448 else if ((forceStrand == false) && (enforceDiffNames == true)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
449 if (name != bedItr->bed.name)
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
450 hits.push_back(*bedItr); // it's a hit, add it.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
451 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
452 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
453
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
454 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
455 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
456 startBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
457 endBin >>= _binNextShift;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
458 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
459 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
460
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
461
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
462 void BedFilePE::loadBedPEFileIntoMap() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
463
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
464 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
465 int bin1, bin2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
466 BedLineStatus bedStatus;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
467 BEDPE bedpeEntry, nullBedPE;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
468
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
469 Open();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
470 bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
471 while (bedStatus != BED_INVALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
472
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
473 if (bedStatus == BED_VALID) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
474 MATE *bedEntry1 = new MATE();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
475 MATE *bedEntry2 = new MATE();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
476 // separate the BEDPE entry into separate
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
477 // BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
478 splitBedPEIntoBeds(bedpeEntry, lineNum, bedEntry1, bedEntry2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
479
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
480 // load end1 into a UCSC bin map
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
481 bin1 = getBin(bedEntry1->bed.start, bedEntry1->bed.end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
482 this->bedMapEnd1[bedEntry1->bed.chrom][bin1].push_back(*bedEntry1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
483
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
484 // load end2 into a UCSC bin map
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
485 bin2 = getBin(bedEntry2->bed.start, bedEntry2->bed.end);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
486 this->bedMapEnd2[bedEntry2->bed.chrom][bin2].push_back(*bedEntry2);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
487
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
488 bedpeEntry = nullBedPE;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
489 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
490 bedStatus = this->GetNextBedPE(bedpeEntry, lineNum);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
491 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
492 Close();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
493 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
494
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
495
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
496 void BedFilePE::splitBedPEIntoBeds(const BEDPE &bedpeEntry, const int &lineNum, MATE *bedEntry1, MATE *bedEntry2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
497
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
498 /*
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
499 Split the BEDPE entry into separate BED entries
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
500
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
501 NOTE: I am using a trick here where I store
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
502 the lineNum of the BEDPE from the original file
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
503 in the "count" column. This allows me to later
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
504 resolve whether the hits found on both ends of BEDPE A
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
505 came from the same entry in BEDPE B. Tracking by "name"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
506 alone with fail when there are multiple mappings for a given
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
507 read-pair.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
508 */
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
509
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
510 bedEntry1->bed.chrom = bedpeEntry.chrom1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
511 bedEntry1->bed.start = bedpeEntry.start1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
512 bedEntry1->bed.end = bedpeEntry.end1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
513 bedEntry1->bed.name = bedpeEntry.name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
514 bedEntry1->bed.score = bedpeEntry.score; // only store the score in end1 to save memory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
515 bedEntry1->bed.strand = bedpeEntry.strand1;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
516 bedEntry1->bed.otherFields = bedpeEntry.otherFields; // only store the otherFields in end1 to save memory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
517 bedEntry1->lineNum = lineNum;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
518 bedEntry1->mate = bedEntry2; // keep a pointer to end2
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
519
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
520 bedEntry2->bed.chrom = bedpeEntry.chrom2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
521 bedEntry2->bed.start = bedpeEntry.start2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
522 bedEntry2->bed.end = bedpeEntry.end2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
523 bedEntry2->bed.name = bedpeEntry.name;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
524 bedEntry2->bed.strand = bedpeEntry.strand2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
525 bedEntry2->lineNum = lineNum;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
526 bedEntry2->mate = bedEntry1; // keep a pointer to end1
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
527 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
528
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
529
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
530