| 0 | 1 /***************************************************************************** | 
|  | 2   pairToBedMain.cpp | 
|  | 3 | 
|  | 4   (c) 2009 - Aaron Quinlan | 
|  | 5   Hall Laboratory | 
|  | 6   Department of Biochemistry and Molecular Genetics | 
|  | 7   University of Virginia | 
|  | 8   aaronquinlan@gmail.com | 
|  | 9 | 
|  | 10   Licenced under the GNU General Public License 2.0 license. | 
|  | 11 ******************************************************************************/ | 
|  | 12 #include "pairToBed.h" | 
|  | 13 #include "version.h" | 
|  | 14 | 
|  | 15 using namespace std; | 
|  | 16 | 
|  | 17 // define our program name | 
|  | 18 #define PROGRAM_NAME "pairToBed" | 
|  | 19 | 
|  | 20 // define our parameter checking macro | 
|  | 21 #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen) | 
|  | 22 | 
|  | 23 // function declarations | 
|  | 24 void ShowHelp(void); | 
|  | 25 | 
|  | 26 int main(int argc, char* argv[]) { | 
|  | 27 | 
|  | 28     // our configuration variables | 
|  | 29     bool showHelp = false; | 
|  | 30 | 
|  | 31     // input files | 
|  | 32     string bedAFile; | 
|  | 33     string bedBFile; | 
|  | 34 | 
|  | 35     // input arguments | 
|  | 36     float overlapFraction = 1E-9; | 
|  | 37     string searchType = "either"; | 
|  | 38 | 
|  | 39     // flags to track parameters | 
|  | 40     bool haveBedA           = false; | 
|  | 41     bool haveBedB           = false; | 
|  | 42     bool haveSearchType     = false; | 
|  | 43     bool haveFraction       = false; | 
|  | 44     bool sameStrand         = false; | 
|  | 45     bool diffStrand         = false; | 
|  | 46     bool useEditDistance    = false; | 
|  | 47     bool inputIsBam         = false; | 
|  | 48     bool outputIsBam        = true; | 
|  | 49     bool uncompressedBam    = false; | 
|  | 50 | 
|  | 51     // check to see if we should print out some help | 
|  | 52     if(argc <= 1) showHelp = true; | 
|  | 53 | 
|  | 54     for(int i = 1; i < argc; i++) { | 
|  | 55         int parameterLength = (int)strlen(argv[i]); | 
|  | 56 | 
|  | 57         if((PARAMETER_CHECK("-h", 2, parameterLength)) || | 
|  | 58         (PARAMETER_CHECK("--help", 5, parameterLength))) { | 
|  | 59             showHelp = true; | 
|  | 60         } | 
|  | 61     } | 
|  | 62 | 
|  | 63     if(showHelp) ShowHelp(); | 
|  | 64 | 
|  | 65     // do some parsing (all of these parameters require 2 strings) | 
|  | 66     for(int i = 1; i < argc; i++) { | 
|  | 67 | 
|  | 68         int parameterLength = (int)strlen(argv[i]); | 
|  | 69 | 
|  | 70         if(PARAMETER_CHECK("-a", 2, parameterLength)) { | 
|  | 71             if ((i+1) < argc) { | 
|  | 72                 haveBedA = true; | 
|  | 73                 outputIsBam  = false; | 
|  | 74                 bedAFile = argv[i + 1]; | 
|  | 75                 i++; | 
|  | 76             } | 
|  | 77         } | 
|  | 78         else if(PARAMETER_CHECK("-abam", 5, parameterLength)) { | 
|  | 79             if ((i+1) < argc) { | 
|  | 80                 haveBedA = true; | 
|  | 81                 inputIsBam = true; | 
|  | 82                 bedAFile = argv[i + 1]; | 
|  | 83                 i++; | 
|  | 84             } | 
|  | 85         } | 
|  | 86         else if(PARAMETER_CHECK("-b", 2, parameterLength)) { | 
|  | 87             if ((i+1) < argc) { | 
|  | 88                 haveBedB = true; | 
|  | 89                 bedBFile = argv[i + 1]; | 
|  | 90                 i++; | 
|  | 91             } | 
|  | 92         } | 
|  | 93         else if(PARAMETER_CHECK("-bedpe", 6, parameterLength)) { | 
|  | 94             outputIsBam = false; | 
|  | 95         } | 
|  | 96         else if(PARAMETER_CHECK("-ed", 3, parameterLength)) { | 
|  | 97             useEditDistance = true; | 
|  | 98         } | 
|  | 99         else if(PARAMETER_CHECK("-type", 5, parameterLength)) { | 
|  | 100             if ((i+1) < argc) { | 
|  | 101                 haveSearchType = true; | 
|  | 102                 searchType = argv[i + 1]; | 
|  | 103                 i++; | 
|  | 104             } | 
|  | 105         } | 
|  | 106         else if(PARAMETER_CHECK("-f", 2, parameterLength)) { | 
|  | 107             if ((i+1) < argc) { | 
|  | 108                 haveFraction = true; | 
|  | 109                 overlapFraction = atof(argv[i + 1]); | 
|  | 110                 i++; | 
|  | 111             } | 
|  | 112         } | 
|  | 113         else if (PARAMETER_CHECK("-s", 2, parameterLength)) { | 
|  | 114             sameStrand = true; | 
|  | 115         } | 
|  | 116         else if (PARAMETER_CHECK("-S", 2, parameterLength)) { | 
|  | 117             diffStrand = true; | 
|  | 118         } | 
|  | 119         else if(PARAMETER_CHECK("-ubam", 5, parameterLength)) { | 
|  | 120             uncompressedBam = true; | 
|  | 121         } | 
|  | 122         else { | 
|  | 123             cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl; | 
|  | 124             showHelp = true; | 
|  | 125         } | 
|  | 126     } | 
|  | 127 | 
|  | 128 | 
|  | 129     // make sure we have both input files | 
|  | 130     if (!haveBedA || !haveBedB) { | 
|  | 131         cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl; | 
|  | 132         showHelp = true; | 
|  | 133     } | 
|  | 134 | 
|  | 135     if (haveSearchType && (searchType != "either") && (searchType != "neither") && (searchType != "both") | 
|  | 136                         && (searchType != "xor") && (searchType != "notboth") && (searchType != "ispan") | 
|  | 137                         && (searchType != "ospan") && (searchType != "notispan") && (searchType != "notospan")) { | 
|  | 138         cerr << endl << "*****" << endl << "*****ERROR: Request \"either\" or \"both\" or \"neither\" or \"xor\" or \"notboth\" or \"ispan\" or \"ospan\" or \"notispan\" or \"notospan\"" << endl << "*****" << endl; | 
|  | 139         showHelp = true; | 
|  | 140     } | 
|  | 141 | 
|  | 142     if ( ((searchType == "ispan") || (searchType == "ospan") || (searchType == "notispan") || (searchType == "notospan")) | 
|  | 143          && (sameStrand || diffStrand) ) { | 
|  | 144         cerr << endl << "*****" << endl << "*****ERROR: Cannot enforce strandedness with selected searchtype" << endl << "*****" << endl; | 
|  | 145         showHelp = true; | 
|  | 146     } | 
|  | 147 | 
|  | 148     if (useEditDistance && (inputIsBam == false || outputIsBam == true)) { | 
|  | 149         cerr << endl << "*****" << endl << "*****ERROR: -ed must be used with -bedpe and -abam." << endl << "*****" << endl; | 
|  | 150         showHelp = true; | 
|  | 151     } | 
|  | 152 | 
|  | 153     if (sameStrand && diffStrand) { | 
|  | 154         cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl; | 
|  | 155         showHelp = true; | 
|  | 156     } | 
|  | 157     if (!showHelp) { | 
|  | 158 | 
|  | 159         BedIntersectPE *bi = new BedIntersectPE(bedAFile, bedBFile, overlapFraction, | 
|  | 160                                                 searchType, sameStrand, diffStrand, inputIsBam, | 
|  | 161                                                 outputIsBam, uncompressedBam, useEditDistance); | 
|  | 162         delete bi; | 
|  | 163         return 0; | 
|  | 164     } | 
|  | 165     else { | 
|  | 166         ShowHelp(); | 
|  | 167     } | 
|  | 168 } | 
|  | 169 | 
|  | 170 | 
|  | 171 void ShowHelp(void) { | 
|  | 172 | 
|  | 173     cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl; | 
|  | 174 | 
|  | 175     cerr << "Author:  Aaron Quinlan (aaronquinlan@gmail.com)" << endl; | 
|  | 176 | 
|  | 177     cerr << "Summary: Report overlaps between a BEDPE file and a BED/GFF/VCF file." << endl << endl; | 
|  | 178 | 
|  | 179     cerr << "Usage:   " << PROGRAM_NAME << " [OPTIONS] -a <bedpe> -b <bed/gff/vcf>" << endl << endl; | 
|  | 180 | 
|  | 181     cerr << "Options: " << endl; | 
|  | 182 | 
|  | 183     cerr << "\t-abam\t"         << "The A input file is in BAM format.  Output will be BAM as well." << endl; | 
|  | 184     cerr                        << "\t\t- Requires BAM to be grouped or sorted by query." << endl << endl; | 
|  | 185 | 
|  | 186     cerr << "\t-ubam\t"         << "Write uncompressed BAM output. Default is to write compressed BAM." << endl << endl; | 
|  | 187     cerr                        << "\t\tis to write output in BAM when using -abam." << endl << endl; | 
|  | 188 | 
|  | 189     cerr << "\t-bedpe\t"        << "When using BAM input (-abam), write output as BEDPE. The default" << endl; | 
|  | 190     cerr                        << "\t\tis to write output in BAM when using -abam." << endl << endl; | 
|  | 191 | 
|  | 192     cerr << "\t-ed\t"           << "Use BAM total edit distance (NM tag) for BEDPE score." << endl; | 
|  | 193     cerr                        << "\t\t- Default for BEDPE is to use the minimum of" << endl; | 
|  | 194     cerr                        << "\t\t  of the two mapping qualities for the pair." << endl; | 
|  | 195     cerr                        << "\t\t- When -ed is used the total edit distance" << endl; | 
|  | 196     cerr                        << "\t\t  from the two mates is reported as the score." << endl << endl; | 
|  | 197 | 
|  | 198     cerr << "\t-f\t"                    << "Minimum overlap required as fraction of A (e.g. 0.05)." << endl; | 
|  | 199     cerr                                << "\t\tDefault is 1E-9 (effectively 1bp)." << endl << endl; | 
|  | 200 | 
|  | 201     cerr << "\t-s\t"                    << "Require same strandedness when finding overlaps." << endl; | 
|  | 202     cerr                                << "\t\tDefault is to ignore stand." << endl; | 
|  | 203     cerr                                << "\t\tNot applicable with -type inspan or -type outspan." << endl << endl; | 
|  | 204 | 
|  | 205     cerr << "\t-S\t"                    << "Require different strandedness when finding overlaps." << endl; | 
|  | 206     cerr                                << "\t\tDefault is to ignore stand." << endl; | 
|  | 207     cerr                                << "\t\tNot applicable with -type inspan or -type outspan." << endl << endl; | 
|  | 208 | 
|  | 209     cerr << "\t-type \t"                << "Approach to reporting overlaps between BEDPE and BED." << endl << endl; | 
|  | 210     cerr                                << "\t\teither\tReport overlaps if either end of A overlaps B." << endl; | 
|  | 211     cerr                                    << "\t\t\t- Default." << endl; | 
|  | 212 | 
|  | 213     cerr                                << "\t\tneither\tReport A if neither end of A overlaps B." << endl; | 
|  | 214 | 
|  | 215     cerr                                << "\t\tboth\tReport overlaps if both ends of A overlap  B." << endl; | 
|  | 216 | 
|  | 217     cerr                                << "\t\txor\tReport overlaps if one and only one end of A overlaps B." << endl; | 
|  | 218 | 
|  | 219     cerr                                << "\t\tnotboth\tReport overlaps if neither end or one and only one " << endl; | 
|  | 220     cerr                                    << "\t\t\tend of A overlap B.  That is, xor + neither." << endl << endl; | 
|  | 221 | 
|  | 222     cerr                                << "\t\tispan\tReport overlaps between [end1, start2] of A and B." << endl; | 
|  | 223     cerr                                    << "\t\t\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl; | 
|  | 224 | 
|  | 225     cerr                                << "\t\tospan\tReport overlaps between [start1, end2] of A and B." << endl; | 
|  | 226     cerr                                    << "\t\t\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl; | 
|  | 227 | 
|  | 228     cerr                                << "\t\tnotispan\tReport A if ispan of A doesn't overlap B." << endl; | 
|  | 229     cerr                                    << "\t\t\t\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl; | 
|  | 230 | 
|  | 231     cerr                                << "\t\tnotospan\tReport A if ospan of A doesn't overlap B." << endl; | 
|  | 232     cerr                                    << "\t\t\t\t- Note: If chrom1 <> chrom2, entry is ignored." << endl << endl; | 
|  | 233 | 
|  | 234     cerr << "Refer to the BEDTools manual for BEDPE format." << endl << endl; | 
|  | 235 | 
|  | 236     exit(1); | 
|  | 237 } |