comparison BEDTools-Version-2.14.3/src/overlap/overlap.cpp @ 1:bec36315bd12 default tip

Deleted selected files
author aaronquinlan
date Sat, 19 Nov 2011 14:17:03 -0500
parents dfcd8b6c1bda
children
comparison
equal deleted inserted replaced
0:dfcd8b6c1bda 1:bec36315bd12
1 /*****************************************************************************
2 overlap.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include <vector>
13 #include <iostream>
14 #include <fstream>
15 #include <stdlib.h>
16
17 #include "version.h"
18 #include "lineFileUtilities.h"
19 #include "bedFile.h"
20 using namespace std;
21
22
23 // define our program name
24 #define PROGRAM_NAME "overlap"
25
26 // define our parameter checking macro
27 #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
28
29
30 // function declarations
31 void ShowHelp(void);
32 void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col);
33 void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col);
34
35 int main(int argc, char* argv[]) {
36
37 // input files
38 string inFile = "stdin";
39 string columns;
40
41 // our configuration variables
42 bool showHelp = false;
43 bool haveInFile = true;
44 bool haveColumns = false;
45
46
47 for(int i = 1; i < argc; i++) {
48 int parameterLength = (int)strlen(argv[i]);
49
50 if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
51 (PARAMETER_CHECK("--help", 5, parameterLength))) {
52 showHelp = true;
53 }
54 }
55
56 if(showHelp) ShowHelp();
57
58 // do some parsing (all of these parameters require 2 strings)
59 for(int i = 1; i < argc; i++) {
60
61 int parameterLength = (int)strlen(argv[i]);
62
63 if(PARAMETER_CHECK("-i", 2, parameterLength)) {
64 if ((i+1) < argc) {
65 inFile = argv[i + 1];
66 i++;
67 }
68 }
69 else if(PARAMETER_CHECK("-cols", 5, parameterLength)) {
70 haveColumns = true;
71 columns = argv[i + 1];
72 i++;
73 }
74 else {
75 cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
76 showHelp = true;
77 }
78 }
79
80 // make sure we have an input files
81 if (!haveInFile ) {
82 cerr << endl << "*****" << endl << "*****ERROR: Need -i file. " << endl << "*****" << endl;
83 showHelp = true;
84 }
85
86 if (!showHelp) {
87
88 // Split the column string sent by the user into discrete column numbers
89 // A comma separated string is expected.
90 vector<string> posColumns;
91 Tokenize(columns, posColumns, ",");
92
93 if (posColumns.size() != 4) {
94 cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl;
95 ShowHelp();
96 }
97 else {
98 short s1, e1, s2, e2;
99 s1 = atoi(posColumns[0].c_str());
100 e1 = atoi(posColumns[1].c_str());
101 s2 = atoi(posColumns[2].c_str());
102 e2 = atoi(posColumns[3].c_str());
103
104 DetermineInput(inFile, s1, e1, s2, e2);
105 }
106 }
107 else {
108 ShowHelp();
109 }
110 }
111
112 void ShowHelp(void) {
113
114 cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
115
116 cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
117
118 cerr << "Summary: Computes the amount of overlap (positive values)" << endl;
119 cerr << "\t or distance (negative values) between genome features" << endl;
120 cerr << "\t and reports the result at the end of the same line." << endl << endl;
121
122 cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <input> -cols s1,e1,s2,e2 " << endl << endl;
123
124 cerr << "Options: " << endl;
125 cerr << "\t-i\t" << "Input file. Use \"stdin\" for pipes." << endl << endl;
126
127 cerr << "\t-cols\t" << "Specify the columns (1-based) for the starts and ends of the" << endl;
128 cerr << "\t\tfeatures for which you'd like to compute the overlap/distance." << endl;
129 cerr << "\t\tThe columns must be listed in the following order: " << endl << endl;
130 cerr << "\t\tstart1,end1,start2,end2" << endl << endl;
131
132 cerr << "Example: " << endl;
133 cerr << "\t$ windowBed -a A.bed -b B.bed -w 10" << endl;
134 cerr << "\tchr1 10 20 A chr1 15 25 B" << endl;
135 cerr << "\tchr1 10 20 C chr1 25 35 D" << endl << endl;
136 cerr << "\t$ windowBed -a A.bed -b B.bed -w 10 | overlap -i stdin -cols 2,3,6,7" << endl;
137 cerr << "\tchr1 10 20 A chr1 15 25 B 5" << endl;
138 cerr << "\tchr1 10 20 C chr1 25 35 D -5" << endl;
139
140 // end the program here
141 exit(1);
142
143 }
144
145
146 void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col) {
147
148
149 if (inFile != "stdin") { // process a file
150
151 ifstream in(inFile.c_str(), ios::in);
152 if ( !in ) {
153 cerr << "Error: The requested input file (" << inFile << ") could not be opened. Exiting!" << endl;
154 exit (1);
155 }
156 ComputeOverlaps(in, s1Col, e1Col, s2Col, e2Col);
157 }
158 else ComputeOverlaps(cin, s1Col, e1Col, s2Col, e2Col);
159 }
160
161
162 void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col) {
163
164 int lineNum = 0;
165 string inLine;
166 vector<string> inFields;
167
168 int overlap;
169
170 char *s1End, *e1End, *s2End, *e2End;
171 long s1, e1, s2, e2;
172
173 while (getline(input, inLine)) {
174 lineNum++;
175 Tokenize(inLine, inFields);
176
177 if (inFields.size() > 1) {
178
179 // test if columns 2 and 3 are integers. If so, assume BED.
180 s1 = strtol(inFields[s1Col-1].c_str(), &s1End, 10);
181 e1 = strtol(inFields[e1Col-1].c_str(), &e1End, 10);
182 s2 = strtol(inFields[s2Col-1].c_str(), &s2End, 10);
183 e2 = strtol(inFields[e2Col-1].c_str(), &e2End, 10);
184
185 // strtol will set pointers to the start of the string if non-integral, base 10
186 // if they all check out, we have valid numeric columns. Otherwise, complain.
187 if (s1End != inFields[s1Col-1].c_str() &&
188 e1End != inFields[e1Col-1].c_str() &&
189 s2End != inFields[s2Col-1].c_str() &&
190 e2End != inFields[e2Col-1].c_str()) {
191
192 overlap = overlaps(s1, e1, s2, e2);
193 printf("%s\t%d\n", inLine.c_str(), overlap);
194 }
195 else {
196 cerr << "One of your columns appears to be non-numeric at line " << lineNum << ". Exiting..." << endl << endl;
197 exit(1);
198 }
199 }
200 inFields.clear();
201 }
202 }