diff BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h	Thu Nov 03 10:25:04 2011 -0400
@@ -0,0 +1,78 @@
+// ***************************************************************************
+// FastaIndex.h (c) 2010 Erik Garrison <erik.garrison@bc.edu>
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 5 February 2010 (EG)
+// ---------------------------------------------------------------------------
+
+#ifndef _FASTA_H
+#define _FASTA_H
+
+#include <map>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <stdint.h>
+#include <stdio.h>
+#include <algorithm>
+#include "LargeFileSupport.h"
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include "split.h"
+#include <stdlib.h>
+#include <ctype.h>
+#include <unistd.h>
+
+using namespace std;
+
+class FastaIndexEntry {
+    friend ostream& operator<<(ostream& output, const FastaIndexEntry& e);
+    public:
+        FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len);
+        FastaIndexEntry(void);
+        ~FastaIndexEntry(void);
+        string name;  // sequence name
+        int length;  // length of sequence
+        long long offset;  // bytes offset of sequence from start of file
+        int line_blen;  // line length in bytes, sequence characters
+        int line_len;  // line length including newline
+        void clear(void);
+};
+
+class FastaIndex : public map<string, FastaIndexEntry> {
+    friend ostream& operator<<(ostream& output, FastaIndex& i);
+    public:
+        FastaIndex(void);
+        ~FastaIndex(void);
+        vector<string> sequenceNames;
+        void indexReference(string refName);
+        void readIndexFile(string fname);
+        void writeIndexFile(string fname);
+        ifstream indexFile;
+        FastaIndexEntry entry(string key);
+        void flushEntryToIndex(FastaIndexEntry& entry);
+        string indexFileExtension(void);
+};
+
+class FastaReference {
+    public:
+        void open(string reffilename, bool usemmap = false);
+        bool usingmmap;
+        string filename;
+        FastaReference(void) : usingmmap(false) { }
+        ~FastaReference(void);
+        FILE* file;
+        void* filemm;
+        size_t filesize;
+        FastaIndex* index;
+        vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart);
+        string getSequence(string seqname);
+        // potentially useful for performance, investigate
+        // void getSequence(string seqname, string& sequence);
+        string getSubSequence(string seqname, int start, int length);
+        string sequenceNameStartingWith(string seqnameStart);
+        long unsigned int sequenceLength(string seqname);
+};
+
+#endif