0
|
1 /*****************************************************************************
|
|
2 bedToIgv.cpp
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licenced under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #include "lineFileUtilities.h"
|
|
13 #include "bedFile.h"
|
|
14 #include "genomeFile.h"
|
|
15 #include "version.h"
|
|
16
|
|
17 #include <vector>
|
|
18 #include <iostream>
|
|
19 #include <fstream>
|
|
20 #include <stdlib.h>
|
|
21
|
|
22 using namespace std;
|
|
23
|
|
24 // define our program name
|
|
25 #define PROGRAM_NAME "bedToIgv"
|
|
26
|
|
27 // define our parameter checking macro
|
|
28 #define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
|
|
29
|
|
30 // function declarations
|
|
31 void ShowHelp(void);
|
|
32
|
|
33 void DetermineBedInput(BedFile *bed, string path, string sortType, string session,
|
|
34 bool collapse, bool useNames, string imageType, int slop);
|
|
35 void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, string session,
|
|
36 bool collapse, bool useNames, string imageType, int slop);
|
|
37
|
|
38
|
|
39 int main(int argc, char* argv[]) {
|
|
40
|
|
41 // our configuration variables
|
|
42 bool showHelp = false;
|
|
43
|
|
44 // input files
|
|
45 string bedFile = "stdin";
|
|
46 string imagePath = "./";
|
|
47 string sortType = "none";
|
|
48 string session = "none";
|
|
49 int slop = 0;
|
|
50 string imageType = "png";
|
|
51
|
|
52 bool haveBed = true;
|
|
53 bool collapse = false;
|
|
54 bool useNames = false;
|
|
55
|
|
56 for(int i = 1; i < argc; i++) {
|
|
57 int parameterLength = (int)strlen(argv[i]);
|
|
58
|
|
59 if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
|
|
60 (PARAMETER_CHECK("--help", 5, parameterLength))) {
|
|
61 showHelp = true;
|
|
62 }
|
|
63 }
|
|
64
|
|
65 if(showHelp) ShowHelp();
|
|
66
|
|
67 // do some parsing (all of these parameters require 2 strings)
|
|
68 for(int i = 1; i < argc; i++) {
|
|
69
|
|
70 int parameterLength = (int)strlen(argv[i]);
|
|
71
|
|
72 if(PARAMETER_CHECK("-i", 2, parameterLength)) {
|
|
73 if ((i+1) < argc) {
|
|
74 bedFile = argv[i + 1];
|
|
75 i++;
|
|
76 }
|
|
77 }
|
|
78 else if(PARAMETER_CHECK("-path", 5, parameterLength)) {
|
|
79 if ((i+1) < argc) {
|
|
80 imagePath = argv[i + 1];
|
|
81 i++;
|
|
82 }
|
|
83 }
|
|
84 else if(PARAMETER_CHECK("-sort", 5, parameterLength)) {
|
|
85 if ((i+1) < argc) {
|
|
86 sortType = argv[i + 1];
|
|
87 i++;
|
|
88 }
|
|
89 }
|
|
90 else if(PARAMETER_CHECK("-sess", 5, parameterLength)) {
|
|
91 if ((i+1) < argc) {
|
|
92 session = argv[i + 1];
|
|
93 i++;
|
|
94 }
|
|
95 }
|
|
96 else if(PARAMETER_CHECK("-clps", 5, parameterLength)) {
|
|
97 collapse = true;
|
|
98 }
|
|
99 else if(PARAMETER_CHECK("-name", 5, parameterLength)) {
|
|
100 useNames = true;
|
|
101 }
|
|
102 else if(PARAMETER_CHECK("-slop", 5, parameterLength)) {
|
|
103 if ((i+1) < argc) {
|
|
104 slop = atoi(argv[i + 1]);
|
|
105 i++;
|
|
106 }
|
|
107 }
|
|
108 else if(PARAMETER_CHECK("-img", 4, parameterLength)) {
|
|
109 if ((i+1) < argc) {
|
|
110 imageType = argv[i + 1];
|
|
111 i++;
|
|
112 }
|
|
113 }
|
|
114 else {
|
|
115 cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
|
|
116 showHelp = true;
|
|
117 }
|
|
118 }
|
|
119
|
|
120 // make sure we have an input files
|
|
121 if (!haveBed ) {
|
|
122 cerr << endl << "*****" << endl << "*****ERROR: Need -i (BED) file. " << endl << "*****" << endl;
|
|
123 showHelp = true;
|
|
124 }
|
|
125 if (sortType != "none") {
|
|
126 if ((sortType != "base") && (sortType != "position") && (sortType != "strand") &&
|
|
127 (sortType != "quality") && (sortType != "sample") && (sortType != "readGroup")) {
|
|
128 cerr << endl << "*****" << endl << "*****ERROR: Invalid sort option. " << endl << "*****" << endl;
|
|
129 showHelp = true;
|
|
130 }
|
|
131 }
|
|
132 if (slop < 0) {
|
|
133 cerr << endl << "*****" << endl << "*****ERROR: Slop must be >= 0. " << endl << "*****" << endl;
|
|
134 showHelp = true;
|
|
135 }
|
|
136
|
|
137 if (!showHelp) {
|
|
138 BedFile *bed = new BedFile(bedFile);
|
|
139 DetermineBedInput(bed, imagePath, sortType, session, collapse, useNames, imageType, slop);
|
|
140 }
|
|
141 else {
|
|
142 ShowHelp();
|
|
143 }
|
|
144 }
|
|
145
|
|
146
|
|
147 void ShowHelp(void) {
|
|
148
|
|
149 cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
|
|
150
|
|
151 cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
|
|
152
|
|
153 cerr << "Summary: Creates a batch script to create IGV images " << endl;
|
|
154 cerr << " at each interval defined in a BED/GFF/VCF file." << endl << endl;
|
|
155
|
|
156 cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bed/gff/vcf>" << endl << endl;
|
|
157
|
|
158 cerr << "Options: " << endl;
|
|
159
|
|
160 cerr << "\t-path\t" << "The full path to which the IGV snapshots should be written." << endl;
|
|
161 cerr << "\t\t(STRING) Default: ./" << endl << endl;
|
|
162
|
|
163 cerr << "\t-sess\t" << "The full path to an existing IGV session file to be " << endl;
|
|
164 cerr << "\t\tloaded prior to taking snapshots." << endl << endl;
|
|
165 cerr << "\t\t(STRING) Default is for no session to be loaded." << endl << endl;
|
|
166
|
|
167 cerr << "\t-sort\t" << "The type of BAM sorting you would like to apply to each image. " << endl;
|
|
168 cerr << "\t\tOptions: base, position, strand, quality, sample, and readGroup" << endl;
|
|
169 cerr << "\t\tDefault is to apply no sorting at all." << endl << endl;
|
|
170
|
|
171 cerr << "\t-clps\t" << "Collapse the aligned reads prior to taking a snapshot. " << endl;
|
|
172 cerr << "\t\tDefault is to no collapse." << endl << endl;
|
|
173
|
|
174 cerr << "\t-name\t" << "Use the \"name\" field (column 4) for each image's filename. " << endl;
|
|
175 cerr << "\t\tDefault is to use the \"chr:start-pos.ext\"." << endl << endl;
|
|
176
|
|
177 cerr << "\t-slop\t" << "Number of flanking base pairs on the left & right of the image." << endl;
|
|
178 cerr << "\t\t- (INT) Default = 0." << endl << endl;
|
|
179
|
|
180 cerr << "\t-img\t" << "The type of image to be created. " << endl;
|
|
181 cerr << "\t\tOptions: png, eps, svg" << endl;
|
|
182 cerr << "\t\tDefault is png." << endl << endl;
|
|
183
|
|
184 cerr << "Notes: " << endl;
|
|
185 cerr << "\t(1) The resulting script is meant to be run from within the IGV GUI version 1.5 or later." << endl;
|
|
186 cerr << "\t(2) Unless you use the -sess option, it is assumed that prior to running the script, " << endl;
|
|
187 cerr << "\t\tyou have loaded the proper genome, tracks and data files." << endl << endl;
|
|
188
|
|
189
|
|
190 // end the program here
|
|
191 exit(1);
|
|
192 }
|
|
193
|
|
194
|
|
195 void DetermineBedInput(BedFile *bed, string path, string sortType, string session,
|
|
196 bool collapse, bool useNames, string imageType, int slop) {
|
|
197
|
|
198 // dealing with a proper file
|
|
199 if (bed->bedFile != "stdin") {
|
|
200
|
|
201 ifstream bedStream(bed->bedFile.c_str(), ios::in);
|
|
202 if ( !bedStream ) {
|
|
203 cerr << "Error: The requested bed file (" << bed->bedFile << ") could not be opened. Exiting!" << endl;
|
|
204 exit (1);
|
|
205 }
|
|
206 ProcessBed(bedStream, bed, path, sortType, session, collapse, useNames, imageType, slop);
|
|
207 }
|
|
208 // reading from stdin
|
|
209 else {
|
|
210 ProcessBed(cin, bed, path, sortType, session, collapse, useNames, imageType, slop);
|
|
211 }
|
|
212 }
|
|
213
|
|
214
|
|
215 void ProcessBed(istream &bedInput, BedFile *bed, string path, string sortType, string session,
|
|
216 bool collapse, bool useNames, string imageType, int slop) {
|
|
217
|
|
218 // set the image path
|
|
219 cout << "snapshotDirectory " << path << endl;
|
|
220
|
|
221 // should we load a session
|
|
222 if (session != "none")
|
|
223 cout << "load " << session << endl;
|
|
224
|
|
225
|
|
226 BED bedEntry, nullBed;
|
|
227 int lineNum = 0;
|
|
228 BedLineStatus bedStatus;
|
|
229
|
|
230 bed->Open();
|
|
231 // process each BED entry and convert to an IGV request
|
|
232 while ((bedStatus = bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
|
|
233 if (bedStatus == BED_VALID) {
|
|
234
|
|
235 string filename = bedEntry.chrom + "_" + ToString(bedEntry.start) + "_" + ToString(bedEntry.end);
|
|
236 string locus = bedEntry.chrom + ":" + ToString(bedEntry.start - slop) + "-" + ToString(bedEntry.end + slop);
|
|
237
|
|
238 if (useNames == true) {
|
|
239 if (bedEntry.name.empty() == false)
|
|
240 filename = filename + "_" + bedEntry.name;
|
|
241 else {
|
|
242 cerr << "Error: You requested that filenames be based upon the name field. However, it appears to be empty. Exiting!" << endl;
|
|
243 exit (1);
|
|
244 }
|
|
245 }
|
|
246 if (slop > 0) {
|
|
247 filename = filename + "_" + "slop" + ToString(slop);
|
|
248 }
|
|
249 // goto
|
|
250 cout << "goto " << locus << endl;
|
|
251
|
|
252 // sort
|
|
253 if (sortType != "none")
|
|
254 cout << "sort " << sortType << endl;
|
|
255
|
|
256 // collapse
|
|
257 if (collapse == true)
|
|
258 cout << "collapse" << endl;
|
|
259
|
|
260 // snapshot
|
|
261 cout << "snapshot " << filename << "." << imageType << endl;
|
|
262
|
|
263 // reset
|
|
264 bedEntry = nullBed;
|
|
265 }
|
|
266 }
|
|
267 // close up
|
|
268 bed->Close();
|
|
269 }
|