0
|
1 /*****************************************************************************
|
|
2 genomeFile.cpp
|
|
3
|
|
4 (c) 2009 - Aaron Quinlan
|
|
5 Hall Laboratory
|
|
6 Department of Biochemistry and Molecular Genetics
|
|
7 University of Virginia
|
|
8 aaronquinlan@gmail.com
|
|
9
|
|
10 Licensed under the GNU General Public License 2.0 license.
|
|
11 ******************************************************************************/
|
|
12 #include "lineFileUtilities.h"
|
|
13 #include "genomeFile.h"
|
|
14
|
|
15
|
|
16 GenomeFile::GenomeFile(const string &genomeFile) {
|
|
17 _genomeFile = genomeFile;
|
|
18 loadGenomeFileIntoMap();
|
|
19 }
|
|
20
|
|
21 GenomeFile::GenomeFile(const RefVector &genome) {
|
|
22 for (size_t i = 0; i < genome.size(); ++i) {
|
|
23 string chrom = genome[i].RefName;
|
|
24 int length = genome[i].RefLength;
|
|
25
|
|
26 _chromSizes[chrom] = length;
|
|
27 _chromList.push_back(chrom);
|
|
28 }
|
|
29 }
|
|
30
|
|
31 // Destructor
|
|
32 GenomeFile::~GenomeFile(void) {
|
|
33 }
|
|
34
|
|
35
|
|
36 void GenomeFile::loadGenomeFileIntoMap() {
|
|
37
|
|
38 string genomeLine;
|
|
39 int lineNum = 0;
|
|
40 vector<string> genomeFields; // vector for a GENOME entry
|
|
41
|
|
42 // open the GENOME file for reading
|
|
43 ifstream genome(_genomeFile.c_str(), ios::in);
|
|
44 if ( !genome ) {
|
|
45 cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl;
|
|
46 exit (1);
|
|
47 }
|
|
48
|
|
49 while (getline(genome, genomeLine)) {
|
|
50
|
|
51 Tokenize(genomeLine,genomeFields); // load the fields into the vector
|
|
52 lineNum++;
|
|
53
|
|
54 // ignore a blank line
|
|
55 if (genomeFields.size() > 0) {
|
|
56 if (genomeFields[0].find("#") == string::npos) {
|
|
57
|
|
58 // we need at least 2 columns
|
|
59 if (genomeFields.size() >= 2) {
|
|
60 char *p2End;
|
|
61 long c2;
|
|
62 // make sure the second column is numeric.
|
|
63 c2 = strtol(genomeFields[1].c_str(), &p2End, 10);
|
|
64
|
|
65 // strtol will set p2End to the start of the string if non-integral, base 10
|
|
66 if (p2End != genomeFields[1].c_str()) {
|
|
67 string chrom = genomeFields[0];
|
|
68 int size = atoi(genomeFields[1].c_str());
|
|
69 _chromSizes[chrom] = size;
|
|
70 _chromList.push_back(chrom);
|
|
71 }
|
|
72 }
|
|
73 else {
|
|
74 cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")";
|
|
75 cerr << " at line " << lineNum << ". Exiting." << endl;
|
|
76 exit (1);
|
|
77 }
|
|
78 }
|
|
79 }
|
|
80 genomeFields.clear();
|
|
81 }
|
|
82 }
|
|
83
|
|
84
|
|
85 int GenomeFile::getChromSize(const string &chrom) {
|
|
86 chromToSizes::const_iterator chromIt = _chromSizes.find(chrom);
|
|
87 if (chromIt != _chromSizes.end())
|
|
88 return _chromSizes[chrom];
|
|
89 else
|
|
90 return -1; // chrom not found.
|
|
91 }
|
|
92
|
|
93 vector<string> GenomeFile::getChromList() {
|
|
94 return _chromList;
|
|
95 }
|
|
96
|
|
97 int GenomeFile::getNumberOfChroms() {
|
|
98 return _chromList.size();
|
|
99 }
|
|
100
|
|
101 string GenomeFile::getGenomeFileName() {
|
|
102 return _genomeFile;
|
|
103 }
|