comparison BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 genomeFile.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licensed under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "genomeFile.h"
14
15
16 GenomeFile::GenomeFile(const string &genomeFile) {
17 _genomeFile = genomeFile;
18 loadGenomeFileIntoMap();
19 }
20
21 GenomeFile::GenomeFile(const RefVector &genome) {
22 for (size_t i = 0; i < genome.size(); ++i) {
23 string chrom = genome[i].RefName;
24 int length = genome[i].RefLength;
25
26 _chromSizes[chrom] = length;
27 _chromList.push_back(chrom);
28 }
29 }
30
31 // Destructor
32 GenomeFile::~GenomeFile(void) {
33 }
34
35
36 void GenomeFile::loadGenomeFileIntoMap() {
37
38 string genomeLine;
39 int lineNum = 0;
40 vector<string> genomeFields; // vector for a GENOME entry
41
42 // open the GENOME file for reading
43 ifstream genome(_genomeFile.c_str(), ios::in);
44 if ( !genome ) {
45 cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl;
46 exit (1);
47 }
48
49 while (getline(genome, genomeLine)) {
50
51 Tokenize(genomeLine,genomeFields); // load the fields into the vector
52 lineNum++;
53
54 // ignore a blank line
55 if (genomeFields.size() > 0) {
56 if (genomeFields[0].find("#") == string::npos) {
57
58 // we need at least 2 columns
59 if (genomeFields.size() >= 2) {
60 char *p2End;
61 long c2;
62 // make sure the second column is numeric.
63 c2 = strtol(genomeFields[1].c_str(), &p2End, 10);
64
65 // strtol will set p2End to the start of the string if non-integral, base 10
66 if (p2End != genomeFields[1].c_str()) {
67 string chrom = genomeFields[0];
68 int size = atoi(genomeFields[1].c_str());
69 _chromSizes[chrom] = size;
70 _chromList.push_back(chrom);
71 }
72 }
73 else {
74 cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")";
75 cerr << " at line " << lineNum << ". Exiting." << endl;
76 exit (1);
77 }
78 }
79 }
80 genomeFields.clear();
81 }
82 }
83
84
85 int GenomeFile::getChromSize(const string &chrom) {
86 chromToSizes::const_iterator chromIt = _chromSizes.find(chrom);
87 if (chromIt != _chromSizes.end())
88 return _chromSizes[chrom];
89 else
90 return -1; // chrom not found.
91 }
92
93 vector<string> GenomeFile::getChromList() {
94 return _chromList;
95 }
96
97 int GenomeFile::getNumberOfChroms() {
98 return _chromList.size();
99 }
100
101 string GenomeFile::getGenomeFileName() {
102 return _genomeFile;
103 }