Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/overlap/overlap.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/overlap/overlap.cpp Thu Nov 03 10:25:04 2011 -0400 @@ -0,0 +1,202 @@ +/***************************************************************************** + overlap.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include <vector> +#include <iostream> +#include <fstream> +#include <stdlib.h> + +#include "version.h" +#include "lineFileUtilities.h" +#include "bedFile.h" +using namespace std; + + +// define our program name +#define PROGRAM_NAME "overlap" + +// define our parameter checking macro +#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) + + +// function declarations +void ShowHelp(void); +void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col); +void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col); + +int main(int argc, char* argv[]) { + + // input files + string inFile = "stdin"; + string columns; + + // our configuration variables + bool showHelp = false; + bool haveInFile = true; + bool haveColumns = false; + + + for(int i = 1; i < argc; i++) { + int parameterLength = (int)strlen(argv[i]); + + if((PARAMETER_CHECK("-h", 2, parameterLength)) || + (PARAMETER_CHECK("--help", 5, parameterLength))) { + showHelp = true; + } + } + + if(showHelp) ShowHelp(); + + // do some parsing (all of these parameters require 2 strings) + for(int i = 1; i < argc; i++) { + + int parameterLength = (int)strlen(argv[i]); + + if(PARAMETER_CHECK("-i", 2, parameterLength)) { + if ((i+1) < argc) { + inFile = argv[i + 1]; + i++; + } + } + else if(PARAMETER_CHECK("-cols", 5, parameterLength)) { + haveColumns = true; + columns = argv[i + 1]; + i++; + } + else { + cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; + showHelp = true; + } + } + + // make sure we have an input files + if (!haveInFile ) { + cerr << endl << "*****" << endl << "*****ERROR: Need -i file. " << endl << "*****" << endl; + showHelp = true; + } + + if (!showHelp) { + + // Split the column string sent by the user into discrete column numbers + // A comma separated string is expected. + vector<string> posColumns; + Tokenize(columns, posColumns, ","); + + if (posColumns.size() != 4) { + cerr << endl << "*****" << endl << "*****ERROR: Please specify 4, comma-separated position columns. " << endl << "*****" << endl; + ShowHelp(); + } + else { + short s1, e1, s2, e2; + s1 = atoi(posColumns[0].c_str()); + e1 = atoi(posColumns[1].c_str()); + s2 = atoi(posColumns[2].c_str()); + e2 = atoi(posColumns[3].c_str()); + + DetermineInput(inFile, s1, e1, s2, e2); + } + } + else { + ShowHelp(); + } +} + +void ShowHelp(void) { + + cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; + + cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl; + + cerr << "Summary: Computes the amount of overlap (positive values)" << endl; + cerr << "\t or distance (negative values) between genome features" << endl; + cerr << "\t and reports the result at the end of the same line." << endl << endl; + + cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <input> -cols s1,e1,s2,e2 " << endl << endl; + + cerr << "Options: " << endl; + cerr << "\t-i\t" << "Input file. Use \"stdin\" for pipes." << endl << endl; + + cerr << "\t-cols\t" << "Specify the columns (1-based) for the starts and ends of the" << endl; + cerr << "\t\tfeatures for which you'd like to compute the overlap/distance." << endl; + cerr << "\t\tThe columns must be listed in the following order: " << endl << endl; + cerr << "\t\tstart1,end1,start2,end2" << endl << endl; + + cerr << "Example: " << endl; + cerr << "\t$ windowBed -a A.bed -b B.bed -w 10" << endl; + cerr << "\tchr1 10 20 A chr1 15 25 B" << endl; + cerr << "\tchr1 10 20 C chr1 25 35 D" << endl << endl; + cerr << "\t$ windowBed -a A.bed -b B.bed -w 10 | overlap -i stdin -cols 2,3,6,7" << endl; + cerr << "\tchr1 10 20 A chr1 15 25 B 5" << endl; + cerr << "\tchr1 10 20 C chr1 25 35 D -5" << endl; + + // end the program here + exit(1); + +} + + +void DetermineInput(string &inFile, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { + + + if (inFile != "stdin") { // process a file + + ifstream in(inFile.c_str(), ios::in); + if ( !in ) { + cerr << "Error: The requested input file (" << inFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + ComputeOverlaps(in, s1Col, e1Col, s2Col, e2Col); + } + else ComputeOverlaps(cin, s1Col, e1Col, s2Col, e2Col); +} + + +void ComputeOverlaps(istream &input, short &s1Col, short &e1Col, short &s2Col, short &e2Col) { + + int lineNum = 0; + string inLine; + vector<string> inFields; + + int overlap; + + char *s1End, *e1End, *s2End, *e2End; + long s1, e1, s2, e2; + + while (getline(input, inLine)) { + lineNum++; + Tokenize(inLine, inFields); + + if (inFields.size() > 1) { + + // test if columns 2 and 3 are integers. If so, assume BED. + s1 = strtol(inFields[s1Col-1].c_str(), &s1End, 10); + e1 = strtol(inFields[e1Col-1].c_str(), &e1End, 10); + s2 = strtol(inFields[s2Col-1].c_str(), &s2End, 10); + e2 = strtol(inFields[e2Col-1].c_str(), &e2End, 10); + + // strtol will set pointers to the start of the string if non-integral, base 10 + // if they all check out, we have valid numeric columns. Otherwise, complain. + if (s1End != inFields[s1Col-1].c_str() && + e1End != inFields[e1Col-1].c_str() && + s2End != inFields[s2Col-1].c_str() && + e2End != inFields[e2Col-1].c_str()) { + + overlap = overlaps(s1, e1, s2, e2); + printf("%s\t%d\n", inLine.c_str(), overlap); + } + else { + cerr << "One of your columns appears to be non-numeric at line " << lineNum << ". Exiting..." << endl << endl; + exit(1); + } + } + inFields.clear(); + } +}