0
|
1 /*****************************************************************************
|
|
2 coverageMain.cpp
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licenced under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #include "coverageBed.h"
|
|
13 #include "version.h"
|
|
14
|
|
15 using namespace std;
|
|
16
|
|
17 // define the version
|
|
18 #define PROGRAM_NAME "coverageBed"
|
|
19
|
|
20 // define our parameter checking macro
|
|
21 #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
|
|
22
|
|
23 // function declarations
|
|
24 void ShowHelp(void);
|
|
25
|
|
26 int main(int argc, char* argv[]) {
|
|
27
|
|
28 // our configuration variables
|
|
29 bool showHelp = false;
|
|
30
|
|
31 // input files
|
|
32 string bedAFile;
|
|
33 string bedBFile;
|
|
34
|
|
35 // parm flags
|
|
36 bool sameStrand = false;
|
|
37 bool diffStrand = false;
|
|
38 bool writeHistogram = false;
|
|
39 bool eachBase = false;
|
|
40 bool obeySplits = false;
|
|
41 bool bamInput = false;
|
|
42 bool haveBedA = false;
|
|
43 bool haveBedB = false;
|
|
44 bool countsOnly = false;
|
|
45
|
|
46 // check to see if we should print out some help
|
|
47 if(argc <= 1) showHelp = true;
|
|
48
|
|
49 for(int i = 1; i < argc; i++) {
|
|
50 int parameterLength = (int)strlen(argv[i]);
|
|
51
|
|
52 if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
|
|
53 (PARAMETER_CHECK("--help", 5, parameterLength))) {
|
|
54 showHelp = true;
|
|
55 }
|
|
56 }
|
|
57
|
|
58 if(showHelp) ShowHelp();
|
|
59
|
|
60 // do some parsing (all of these parameters require 2 strings)
|
|
61 for(int i = 1; i < argc; i++) {
|
|
62
|
|
63 int parameterLength = (int)strlen(argv[i]);
|
|
64
|
|
65 if(PARAMETER_CHECK("-a", 2, parameterLength)) {
|
|
66 if ((i+1) < argc) {
|
|
67 haveBedA = true;
|
|
68 bedAFile = argv[i + 1];
|
|
69 i++;
|
|
70 }
|
|
71 }
|
|
72 else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
|
|
73 if ((i+1) < argc) {
|
|
74 haveBedA = true;
|
|
75 bamInput = true;
|
|
76 bedAFile = argv[i + 1];
|
|
77 i++;
|
|
78 }
|
|
79 }
|
|
80 else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
|
|
81 if ((i+1) < argc) {
|
|
82 haveBedB = true;
|
|
83 bedBFile = argv[i + 1];
|
|
84 i++;
|
|
85 }
|
|
86 }
|
|
87 else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
|
|
88 sameStrand = true;
|
|
89 }
|
|
90 else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
|
|
91 diffStrand = true;
|
|
92 }
|
|
93 else if (PARAMETER_CHECK("-hist", 5, parameterLength)) {
|
|
94 writeHistogram = true;
|
|
95 }
|
|
96 else if(PARAMETER_CHECK("-d", 2, parameterLength)) {
|
|
97 eachBase = true;
|
|
98 }
|
|
99 else if (PARAMETER_CHECK("-split", 6, parameterLength)) {
|
|
100 obeySplits = true;
|
|
101 }
|
|
102 else if (PARAMETER_CHECK("-counts", 7, parameterLength)) {
|
|
103 countsOnly = true;
|
|
104 }
|
|
105 else {
|
|
106 cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
|
|
107 showHelp = true;
|
|
108 }
|
|
109 }
|
|
110
|
|
111 // make sure we have both input files
|
|
112 if (!haveBedA || !haveBedB) {
|
|
113 cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
|
|
114 showHelp = true;
|
|
115 }
|
|
116
|
|
117 if (sameStrand && diffStrand) {
|
|
118 cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
|
|
119 showHelp = true;
|
|
120 }
|
|
121
|
|
122 if (!showHelp) {
|
|
123 BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, sameStrand, diffStrand,
|
|
124 writeHistogram, bamInput, obeySplits, eachBase, countsOnly);
|
|
125 delete bg;
|
|
126 return 0;
|
|
127 }
|
|
128 else {
|
|
129 ShowHelp();
|
|
130 }
|
|
131 }
|
|
132
|
|
133 void ShowHelp(void) {
|
|
134
|
|
135 cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
|
|
136
|
|
137 cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
|
|
138
|
|
139 cerr << "Summary: Returns the depth and breadth of coverage of features from A" << endl;
|
|
140 cerr << "\t on the intervals in B." << endl << endl;
|
|
141
|
|
142 cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
|
|
143
|
|
144 cerr << "Options: " << endl;
|
|
145
|
|
146 cerr << "\t-abam\t" << "The A input file is in BAM format." << endl << endl;
|
|
147
|
|
148 cerr << "\t-s\t" << "Require same strandedness. That is, only counts hits in A that" << endl;
|
|
149 cerr << "\t\toverlap B on the _same_ strand." << endl;
|
|
150 cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
|
|
151
|
|
152 cerr << "\t-S\t" << "Require different strandedness. That is, only report hits in A that" << endl;
|
|
153 cerr << "\t\toverlap B on the _opposite_ strand." << endl;
|
|
154 cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
|
|
155
|
|
156 cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl;
|
|
157 cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl;
|
|
158 cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl;
|
|
159 cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl;
|
|
160
|
|
161 cerr << "\t-d\t" << "Report the depth at each position in each B feature." << endl;
|
|
162 cerr << "\t\tPositions reported are one based. Each position" << endl;
|
|
163 cerr << "\t\tand depth follow the complete B feature." << endl << endl;
|
|
164
|
|
165 cerr << "\t-counts\t" << "Only report the count of overlaps, don't compute fraction, etc." << endl << endl;
|
|
166
|
|
167 cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl;
|
|
168 cerr << "\t\twhen computing coverage." << endl;
|
|
169 cerr << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl;
|
|
170 cerr << "\t\tto infer the blocks for computing coverage." << endl;
|
|
171 cerr << "\t\tFor BED12 files, this uses the BlockCount, BlockStarts," << endl;
|
|
172 cerr << "\t\tand BlockEnds fields (i.e., columns 10,11,12)." << endl << endl;
|
|
173
|
|
174 cerr << "Default Output: " << endl;
|
|
175 cerr << "\t" << " After each entry in B, reports: " << endl;
|
|
176 cerr << "\t 1) The number of features in A that overlapped the B interval." << endl;
|
|
177 cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl;
|
|
178 cerr << "\t 3) The length of the entry in B." << endl;
|
|
179 cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl << endl;
|
|
180
|
|
181 exit(1);
|
|
182 }
|