Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/overlap/overlap.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 overlap.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include <vector> | |
13 #include <iostream> | |
14 #include <fstream> | |
15 #include <stdlib.h> | |
16 | |
17 #include "version.h" | |
18 #include "lineFileUtilities.h" | |
19 #include "bedFile.h" | |
20 using namespace std; | |
21 | |
22 | |
23 // define our program name | |
24 #define PROGRAM_NAME "overlap" | |
25 | |
26 // define our parameter checking macro | |
27 #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) | |
28 | |
29 | |
30 // function declarations | |
31 void ShowHelp(void); | |
32 void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col); | |
33 void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col); | |
34 | |
35 int main(int argc, char* argv[]) { | |
36 | |
37 // input files | |
38 string inFile = "stdin"; | |
39 string columns; | |
40 | |
41 // our configuration variables | |
42 bool showHelp = false; | |
43 bool haveInFile = true; | |
44 bool haveColumns = false; | |
45 | |
46 | |
47 for(int i = 1; i < argc; i++) { | |
48 int parameterLength = (int)strlen(argv[i]); | |
49 | |
50 if((PARAMETER_CHECK("-h", 2, parameterLength)) || | |
51 (PARAMETER_CHECK("--help", 5, parameterLength))) { | |
52 showHelp = true; | |
53 } | |
54 } | |
55 | |
56 if(showHelp) ShowHelp(); | |
57 | |
58 // do some parsing (all of these parameters require 2 strings) | |
59 for(int i = 1; i < argc; i++) { | |
60 | |
61 int parameterLength = (int)strlen(argv[i]); | |
62 | |
63 if(PARAMETER_CHECK("-i", 2, parameterLength)) { | |
64 if ((i+1) < argc) { | |
65 inFile = argv[i + 1]; | |
66 i++; | |
67 } | |
68 } | |
69 else if(PARAMETER_CHECK("-cols", 5, parameterLength)) { | |
70 haveColumns = true; | |
71 columns = argv[i + 1]; | |
72 i++; | |
73 } | |
74 else { | |
75 cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; | |
76 showHelp = true; | |
77 } | |
78 } | |
79 | |
80 // make sure we have an input files | |
81 if (!haveInFile ) { | |
82 cerr << endl << "*****" << endl << "*****ERROR: Need -i file. " << endl << "*****" << endl; | |
83 showHelp = true; | |
84 } | |
85 | |
86 if (!showHelp) { | |
87 | |
88 // Split the column string sent by the user into discrete column numbers | |
89 // A comma separated string is expected. | |
90 vector<string> posColumns; | |
91 Tokenize(columns, posColumns, ","); | |
92 | |
93 if (posColumns.size() != 4) { | |
94 cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl; | |
95 ShowHelp(); | |
96 } | |
97 else { | |
98 short s1, e1, s2, e2; | |
99 s1 = atoi(posColumns[0].c_str()); | |
100 e1 = atoi(posColumns[1].c_str()); | |
101 s2 = atoi(posColumns[2].c_str()); | |
102 e2 = atoi(posColumns[3].c_str()); | |
103 | |
104 DetermineInput(inFile, s1, e1, s2, e2); | |
105 } | |
106 } | |
107 else { | |
108 ShowHelp(); | |
109 } | |
110 } | |
111 | |
112 void ShowHelp(void) { | |
113 | |
114 cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; | |
115 | |
116 cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; | |
117 | |
118 cerr << "Summary: Computes the amount of overlap (positive values)" << endl; | |
119 cerr << "\t or distance (negative values) between genome features" << endl; | |
120 cerr << "\t and reports the result at the end of the same line." << endl << endl; | |
121 | |
122 cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <input> -cols s1,e1,s2,e2 " << endl << endl; | |
123 | |
124 cerr << "Options: " << endl; | |
125 cerr << "\t-i\t" << "Input file. Use \"stdin\" for pipes." << endl << endl; | |
126 | |
127 cerr << "\t-cols\t" << "Specify the columns (1-based) for the starts and ends of the" << endl; | |
128 cerr << "\t\tfeatures for which you'd like to compute the overlap/distance." << endl; | |
129 cerr << "\t\tThe columns must be listed in the following order: " << endl << endl; | |
130 cerr << "\t\tstart1,end1,start2,end2" << endl << endl; | |
131 | |
132 cerr << "Example: " << endl; | |
133 cerr << "\t$ windowBed -a A.bed -b B.bed -w 10" << endl; | |
134 cerr << "\tchr1 10 20 A chr1 15 25 B" << endl; | |
135 cerr << "\tchr1 10 20 C chr1 25 35 D" << endl << endl; | |
136 cerr << "\t$ windowBed -a A.bed -b B.bed -w 10 | overlap -i stdin -cols 2,3,6,7" << endl; | |
137 cerr << "\tchr1 10 20 A chr1 15 25 B 5" << endl; | |
138 cerr << "\tchr1 10 20 C chr1 25 35 D -5" << endl; | |
139 | |
140 // end the program here | |
141 exit(1); | |
142 | |
143 } | |
144 | |
145 | |
146 void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { | |
147 | |
148 | |
149 if (inFile != "stdin") { // process a file | |
150 | |
151 ifstream in(inFile.c_str(), ios::in); | |
152 if ( !in ) { | |
153 cerr << "Error: The requested input file (" << inFile << ") could not be opened. Exiting!" << endl; | |
154 exit (1); | |
155 } | |
156 ComputeOverlaps(in, s1Col, e1Col, s2Col, e2Col); | |
157 } | |
158 else ComputeOverlaps(cin, s1Col, e1Col, s2Col, e2Col); | |
159 } | |
160 | |
161 | |
162 void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { | |
163 | |
164 int lineNum = 0; | |
165 string inLine; | |
166 vector<string> inFields; | |
167 | |
168 int overlap; | |
169 | |
170 char *s1End, *e1End, *s2End, *e2End; | |
171 long s1, e1, s2, e2; | |
172 | |
173 while (getline(input, inLine)) { | |
174 lineNum++; | |
175 Tokenize(inLine, inFields); | |
176 | |
177 if (inFields.size() > 1) { | |
178 | |
179 // test if columns 2 and 3 are integers. If so, assume BED. | |
180 s1 = strtol(inFields[s1Col-1].c_str(), &s1End, 10); | |
181 e1 = strtol(inFields[e1Col-1].c_str(), &e1End, 10); | |
182 s2 = strtol(inFields[s2Col-1].c_str(), &s2End, 10); | |
183 e2 = strtol(inFields[e2Col-1].c_str(), &e2End, 10); | |
184 | |
185 // strtol will set pointers to the start of the string if non-integral, base 10 | |
186 // if they all check out, we have valid numeric columns. Otherwise, complain. | |
187 if (s1End != inFields[s1Col-1].c_str() && | |
188 e1End != inFields[e1Col-1].c_str() && | |
189 s2End != inFields[s2Col-1].c_str() && | |
190 e2End != inFields[e2Col-1].c_str()) { | |
191 | |
192 overlap = overlaps(s1, e1, s2, e2); | |
193 printf("%s\t%d\n", inLine.c_str(), overlap); | |
194 } | |
195 else { | |
196 cerr << "One of your columns appears to be non-numeric at line " << lineNum << ". Exiting..." << endl << endl; | |
197 exit(1); | |
198 } | |
199 } | |
200 inFields.clear(); | |
201 } | |
202 } |