diff BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/genomeFile/genomeFile.cpp	Thu Nov 03 10:25:04 2011 -0400
@@ -0,0 +1,103 @@
+/*****************************************************************************
+  genomeFile.cpp
+
+  (c) 2009 - Aaron Quinlan
+  Hall Laboratory
+  Department of Biochemistry and Molecular Genetics
+  University of Virginia
+  aaronquinlan@gmail.com
+
+  Licensed under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "genomeFile.h"
+
+
+GenomeFile::GenomeFile(const string &genomeFile) {
+    _genomeFile = genomeFile;
+    loadGenomeFileIntoMap();
+}
+
+GenomeFile::GenomeFile(const RefVector &genome) {
+    for (size_t i = 0; i < genome.size(); ++i) {
+        string chrom = genome[i].RefName;
+        int length = genome[i].RefLength;
+        
+        _chromSizes[chrom] = length;
+        _chromList.push_back(chrom);
+    }
+}
+
+// Destructor
+GenomeFile::~GenomeFile(void) {
+}
+
+
+void GenomeFile::loadGenomeFileIntoMap() {
+
+    string genomeLine;
+    int lineNum = 0;
+    vector<string> genomeFields;            // vector for a GENOME entry
+
+    // open the GENOME file for reading
+    ifstream genome(_genomeFile.c_str(), ios::in);
+    if ( !genome ) {
+        cerr << "Error: The requested genome file (" << _genomeFile << ") could not be opened. Exiting!" << endl;
+        exit (1);
+    }
+
+    while (getline(genome, genomeLine)) {
+
+        Tokenize(genomeLine,genomeFields);  // load the fields into the vector
+        lineNum++;
+
+        // ignore a blank line
+        if (genomeFields.size() > 0) {
+            if (genomeFields[0].find("#") == string::npos) {
+
+                // we need at least 2 columns
+                if (genomeFields.size() >= 2) {
+                    char *p2End;
+                    long c2;
+                    // make sure the second column is numeric.
+                    c2 = strtol(genomeFields[1].c_str(), &p2End, 10);
+
+                    // strtol  will set p2End to the start of the string if non-integral, base 10
+                    if (p2End != genomeFields[1].c_str()) {
+                        string chrom       = genomeFields[0];
+                        int size           = atoi(genomeFields[1].c_str());
+                        _chromSizes[chrom] = size;
+                        _chromList.push_back(chrom);
+                    }
+                }
+                else {
+                    cerr << "Less than the req'd two fields were encountered in the genome file (" << _genomeFile << ")";
+                    cerr << " at line " << lineNum << ".  Exiting." << endl;
+                    exit (1);
+                }
+            }
+        }
+        genomeFields.clear();
+    }
+}
+
+
+int GenomeFile::getChromSize(const string &chrom) {
+    chromToSizes::const_iterator chromIt = _chromSizes.find(chrom);
+    if (chromIt != _chromSizes.end())
+        return _chromSizes[chrom];
+    else
+        return -1;  // chrom not found.
+}
+
+vector<string> GenomeFile::getChromList() {
+    return _chromList;
+}
+
+int GenomeFile::getNumberOfChroms() {
+    return _chromList.size();
+}
+
+string GenomeFile::getGenomeFileName() {
+    return _genomeFile;
+}