Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/overlap/overlap.cpp @ 0:dfcd8b6c1bda
Uploaded
| author | aaronquinlan |
|---|---|
| date | Thu, 03 Nov 2011 10:25:04 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfcd8b6c1bda |
|---|---|
| 1 /***************************************************************************** | |
| 2 overlap.cpp | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #include <vector> | |
| 13 #include <iostream> | |
| 14 #include <fstream> | |
| 15 #include <stdlib.h> | |
| 16 | |
| 17 #include "version.h" | |
| 18 #include "lineFileUtilities.h" | |
| 19 #include "bedFile.h" | |
| 20 using namespace std; | |
| 21 | |
| 22 | |
| 23 // define our program name | |
| 24 #define PROGRAM_NAME "overlap" | |
| 25 | |
| 26 // define our parameter checking macro | |
| 27 #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) | |
| 28 | |
| 29 | |
| 30 // function declarations | |
| 31 void ShowHelp(void); | |
| 32 void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col); | |
| 33 void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col); | |
| 34 | |
| 35 int main(int argc, char* argv[]) { | |
| 36 | |
| 37 // input files | |
| 38 string inFile = "stdin"; | |
| 39 string columns; | |
| 40 | |
| 41 // our configuration variables | |
| 42 bool showHelp = false; | |
| 43 bool haveInFile = true; | |
| 44 bool haveColumns = false; | |
| 45 | |
| 46 | |
| 47 for(int i = 1; i < argc; i++) { | |
| 48 int parameterLength = (int)strlen(argv[i]); | |
| 49 | |
| 50 if((PARAMETER_CHECK("-h", 2, parameterLength)) || | |
| 51 (PARAMETER_CHECK("--help", 5, parameterLength))) { | |
| 52 showHelp = true; | |
| 53 } | |
| 54 } | |
| 55 | |
| 56 if(showHelp) ShowHelp(); | |
| 57 | |
| 58 // do some parsing (all of these parameters require 2 strings) | |
| 59 for(int i = 1; i < argc; i++) { | |
| 60 | |
| 61 int parameterLength = (int)strlen(argv[i]); | |
| 62 | |
| 63 if(PARAMETER_CHECK("-i", 2, parameterLength)) { | |
| 64 if ((i+1) < argc) { | |
| 65 inFile = argv[i + 1]; | |
| 66 i++; | |
| 67 } | |
| 68 } | |
| 69 else if(PARAMETER_CHECK("-cols", 5, parameterLength)) { | |
| 70 haveColumns = true; | |
| 71 columns = argv[i + 1]; | |
| 72 i++; | |
| 73 } | |
| 74 else { | |
| 75 cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; | |
| 76 showHelp = true; | |
| 77 } | |
| 78 } | |
| 79 | |
| 80 // make sure we have an input files | |
| 81 if (!haveInFile ) { | |
| 82 cerr << endl << "*****" << endl << "*****ERROR: Need -i file. " << endl << "*****" << endl; | |
| 83 showHelp = true; | |
| 84 } | |
| 85 | |
| 86 if (!showHelp) { | |
| 87 | |
| 88 // Split the column string sent by the user into discrete column numbers | |
| 89 // A comma separated string is expected. | |
| 90 vector<string> posColumns; | |
| 91 Tokenize(columns, posColumns, ","); | |
| 92 | |
| 93 if (posColumns.size() != 4) { | |
| 94 cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl; | |
| 95 ShowHelp(); | |
| 96 } | |
| 97 else { | |
| 98 short s1, e1, s2, e2; | |
| 99 s1 = atoi(posColumns[0].c_str()); | |
| 100 e1 = atoi(posColumns[1].c_str()); | |
| 101 s2 = atoi(posColumns[2].c_str()); | |
| 102 e2 = atoi(posColumns[3].c_str()); | |
| 103 | |
| 104 DetermineInput(inFile, s1, e1, s2, e2); | |
| 105 } | |
| 106 } | |
| 107 else { | |
| 108 ShowHelp(); | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 void ShowHelp(void) { | |
| 113 | |
| 114 cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; | |
| 115 | |
| 116 cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; | |
| 117 | |
| 118 cerr << "Summary: Computes the amount of overlap (positive values)" << endl; | |
| 119 cerr << "\t or distance (negative values) between genome features" << endl; | |
| 120 cerr << "\t and reports the result at the end of the same line." << endl << endl; | |
| 121 | |
| 122 cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <input> -cols s1,e1,s2,e2 " << endl << endl; | |
| 123 | |
| 124 cerr << "Options: " << endl; | |
| 125 cerr << "\t-i\t" << "Input file. Use \"stdin\" for pipes." << endl << endl; | |
| 126 | |
| 127 cerr << "\t-cols\t" << "Specify the columns (1-based) for the starts and ends of the" << endl; | |
| 128 cerr << "\t\tfeatures for which you'd like to compute the overlap/distance." << endl; | |
| 129 cerr << "\t\tThe columns must be listed in the following order: " << endl << endl; | |
| 130 cerr << "\t\tstart1,end1,start2,end2" << endl << endl; | |
| 131 | |
| 132 cerr << "Example: " << endl; | |
| 133 cerr << "\t$ windowBed -a A.bed -b B.bed -w 10" << endl; | |
| 134 cerr << "\tchr1 10 20 A chr1 15 25 B" << endl; | |
| 135 cerr << "\tchr1 10 20 C chr1 25 35 D" << endl << endl; | |
| 136 cerr << "\t$ windowBed -a A.bed -b B.bed -w 10 | overlap -i stdin -cols 2,3,6,7" << endl; | |
| 137 cerr << "\tchr1 10 20 A chr1 15 25 B 5" << endl; | |
| 138 cerr << "\tchr1 10 20 C chr1 25 35 D -5" << endl; | |
| 139 | |
| 140 // end the program here | |
| 141 exit(1); | |
| 142 | |
| 143 } | |
| 144 | |
| 145 | |
| 146 void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { | |
| 147 | |
| 148 | |
| 149 if (inFile != "stdin") { // process a file | |
| 150 | |
| 151 ifstream in(inFile.c_str(), ios::in); | |
| 152 if ( !in ) { | |
| 153 cerr << "Error: The requested input file (" << inFile << ") could not be opened. Exiting!" << endl; | |
| 154 exit (1); | |
| 155 } | |
| 156 ComputeOverlaps(in, s1Col, e1Col, s2Col, e2Col); | |
| 157 } | |
| 158 else ComputeOverlaps(cin, s1Col, e1Col, s2Col, e2Col); | |
| 159 } | |
| 160 | |
| 161 | |
| 162 void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { | |
| 163 | |
| 164 int lineNum = 0; | |
| 165 string inLine; | |
| 166 vector<string> inFields; | |
| 167 | |
| 168 int overlap; | |
| 169 | |
| 170 char *s1End, *e1End, *s2End, *e2End; | |
| 171 long s1, e1, s2, e2; | |
| 172 | |
| 173 while (getline(input, inLine)) { | |
| 174 lineNum++; | |
| 175 Tokenize(inLine, inFields); | |
| 176 | |
| 177 if (inFields.size() > 1) { | |
| 178 | |
| 179 // test if columns 2 and 3 are integers. If so, assume BED. | |
| 180 s1 = strtol(inFields[s1Col-1].c_str(), &s1End, 10); | |
| 181 e1 = strtol(inFields[e1Col-1].c_str(), &e1End, 10); | |
| 182 s2 = strtol(inFields[s2Col-1].c_str(), &s2End, 10); | |
| 183 e2 = strtol(inFields[e2Col-1].c_str(), &e2End, 10); | |
| 184 | |
| 185 // strtol will set pointers to the start of the string if non-integral, base 10 | |
| 186 // if they all check out, we have valid numeric columns. Otherwise, complain. | |
| 187 if (s1End != inFields[s1Col-1].c_str() && | |
| 188 e1End != inFields[e1Col-1].c_str() && | |
| 189 s2End != inFields[s2Col-1].c_str() && | |
| 190 e2End != inFields[e2Col-1].c_str()) { | |
| 191 | |
| 192 overlap = overlaps(s1, e1, s2, e2); | |
| 193 printf("%s\t%d\n", inLine.c_str(), overlap); | |
| 194 } | |
| 195 else { | |
| 196 cerr << "One of your columns appears to be non-numeric at line " << lineNum << ". Exiting..." << endl << endl; | |
| 197 exit(1); | |
| 198 } | |
| 199 } | |
| 200 inFields.clear(); | |
| 201 } | |
| 202 } |
