Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h Thu Nov 03 10:25:04 2011 -0400 @@ -0,0 +1,78 @@ +// *************************************************************************** +// FastaIndex.h (c) 2010 Erik Garrison <erik.garrison@bc.edu> +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 5 February 2010 (EG) +// --------------------------------------------------------------------------- + +#ifndef _FASTA_H +#define _FASTA_H + +#include <map> +#include <iostream> +#include <fstream> +#include <vector> +#include <stdint.h> +#include <stdio.h> +#include <algorithm> +#include "LargeFileSupport.h" +#include <sys/stat.h> +#include <sys/mman.h> +#include "split.h" +#include <stdlib.h> +#include <ctype.h> +#include <unistd.h> + +using namespace std; + +class FastaIndexEntry { + friend ostream& operator<<(ostream& output, const FastaIndexEntry& e); + public: + FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len); + FastaIndexEntry(void); + ~FastaIndexEntry(void); + string name; // sequence name + int length; // length of sequence + long long offset; // bytes offset of sequence from start of file + int line_blen; // line length in bytes, sequence characters + int line_len; // line length including newline + void clear(void); +}; + +class FastaIndex : public map<string, FastaIndexEntry> { + friend ostream& operator<<(ostream& output, FastaIndex& i); + public: + FastaIndex(void); + ~FastaIndex(void); + vector<string> sequenceNames; + void indexReference(string refName); + void readIndexFile(string fname); + void writeIndexFile(string fname); + ifstream indexFile; + FastaIndexEntry entry(string key); + void flushEntryToIndex(FastaIndexEntry& entry); + string indexFileExtension(void); +}; + +class FastaReference { + public: + void open(string reffilename, bool usemmap = false); + bool usingmmap; + string filename; + FastaReference(void) : usingmmap(false) { } + ~FastaReference(void); + FILE* file; + void* filemm; + size_t filesize; + FastaIndex* index; + vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart); + string getSequence(string seqname); + // potentially useful for performance, investigate + // void getSequence(string seqname, string& sequence); + string getSubSequence(string seqname, int start, int length); + string sequenceNameStartingWith(string seqnameStart); + long unsigned int sequenceLength(string seqname); +}; + +#endif