annotate BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
1 /*****************************************************************************
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
2 genomeFile.cpp
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
3
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
4 (c) 2009 - Aaron Quinlan
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
5 Hall Laboratory
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
6 Department of Biochemistry and Molecular Genetics
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
7 University of Virginia
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
8 aaronquinlan@gmail.com
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
9
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
10 Licensed under the GNU General Public License 2.0 license.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
11 ******************************************************************************/
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
12 #include "lineFileUtilities.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
13 #include "genomeFile.h"
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
14
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
15
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
16 GenomeFile::GenomeFile(const string &genomeFile) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
17 _genomeFile = genomeFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
18 loadGenomeFileIntoMap();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
19 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
20
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
21 GenomeFile::GenomeFile(const RefVector &genome) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
22 for (size_t i = 0; i < genome.size(); ++i) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
23 string chrom = genome[i].RefName;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
24 int length = genome[i].RefLength;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
25
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
26 _chromSizes[chrom] = length;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
27 _chromList.push_back(chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
28 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
29 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
30
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
31 // Destructor
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
32 GenomeFile::~GenomeFile(void) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
33 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
34
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
35
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
36 void GenomeFile::loadGenomeFileIntoMap() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
37
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
38 string genomeLine;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
39 int lineNum = 0;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
40 vector<string> genomeFields; // vector for a GENOME entry
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
41
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
42 // open the GENOME file for reading
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
43 ifstream genome(_genomeFile.c_str(), ios::in);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
44 if ( !genome ) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
45 cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
46 exit (1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
47 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
48
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
49 while (getline(genome, genomeLine)) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
50
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
51 Tokenize(genomeLine,genomeFields); // load the fields into the vector
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
52 lineNum++;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
53
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
54 // ignore a blank line
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
55 if (genomeFields.size() > 0) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
56 if (genomeFields[0].find("#") == string::npos) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
57
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
58 // we need at least 2 columns
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
59 if (genomeFields.size() >= 2) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
60 char *p2End;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
61 long c2;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
62 // make sure the second column is numeric.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
63 c2 = strtol(genomeFields[1].c_str(), &p2End, 10);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
64
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
65 // strtol will set p2End to the start of the string if non-integral, base 10
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
66 if (p2End != genomeFields[1].c_str()) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
67 string chrom = genomeFields[0];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
68 int size = atoi(genomeFields[1].c_str());
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
69 _chromSizes[chrom] = size;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
70 _chromList.push_back(chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
71 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
72 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
73 else {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
74 cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")";
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
75 cerr << " at line " << lineNum << ". Exiting." << endl;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
76 exit (1);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
77 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
78 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
79 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
80 genomeFields.clear();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
81 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
82 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
83
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
84
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
85 int GenomeFile::getChromSize(const string &chrom) {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
86 chromToSizes::const_iterator chromIt = _chromSizes.find(chrom);
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
87 if (chromIt != _chromSizes.end())
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
88 return _chromSizes[chrom];
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
89 else
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
90 return -1; // chrom not found.
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
91 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
92
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
93 vector<string> GenomeFile::getChromList() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
94 return _chromList;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
95 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
96
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
97 int GenomeFile::getNumberOfChroms() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
98 return _chromList.size();
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
99 }
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
100
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
101 string GenomeFile::getGenomeFileName() {
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
102 return _genomeFile;
dfcd8b6c1bda Uploaded
aaronquinlan
parents:
diff changeset
103 }