0
|
1 // ***************************************************************************
|
|
2 // FastaIndex.h (c) 2010 Erik Garrison <erik.garrison@bc.edu>
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 5 February 2010 (EG)
|
|
7 // ---------------------------------------------------------------------------
|
|
8
|
|
9 #ifndef _FASTA_H
|
|
10 #define _FASTA_H
|
|
11
|
|
12 #include <map>
|
|
13 #include <iostream>
|
|
14 #include <fstream>
|
|
15 #include <vector>
|
|
16 #include <stdint.h>
|
|
17 #include <stdio.h>
|
|
18 #include <algorithm>
|
|
19 #include "LargeFileSupport.h"
|
|
20 #include <sys/stat.h>
|
|
21 #include <sys/mman.h>
|
|
22 #include "split.h"
|
|
23 #include <stdlib.h>
|
|
24 #include <ctype.h>
|
|
25 #include <unistd.h>
|
|
26
|
|
27 using namespace std;
|
|
28
|
|
29 class FastaIndexEntry {
|
|
30 friend ostream& operator<<(ostream& output, const FastaIndexEntry& e);
|
|
31 public:
|
|
32 FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len);
|
|
33 FastaIndexEntry(void);
|
|
34 ~FastaIndexEntry(void);
|
|
35 string name; // sequence name
|
|
36 int length; // length of sequence
|
|
37 long long offset; // bytes offset of sequence from start of file
|
|
38 int line_blen; // line length in bytes, sequence characters
|
|
39 int line_len; // line length including newline
|
|
40 void clear(void);
|
|
41 };
|
|
42
|
|
43 class FastaIndex : public map<string, FastaIndexEntry> {
|
|
44 friend ostream& operator<<(ostream& output, FastaIndex& i);
|
|
45 public:
|
|
46 FastaIndex(void);
|
|
47 ~FastaIndex(void);
|
|
48 vector<string> sequenceNames;
|
|
49 void indexReference(string refName);
|
|
50 void readIndexFile(string fname);
|
|
51 void writeIndexFile(string fname);
|
|
52 ifstream indexFile;
|
|
53 FastaIndexEntry entry(string key);
|
|
54 void flushEntryToIndex(FastaIndexEntry& entry);
|
|
55 string indexFileExtension(void);
|
|
56 };
|
|
57
|
|
58 class FastaReference {
|
|
59 public:
|
|
60 void open(string reffilename, bool usemmap = false);
|
|
61 bool usingmmap;
|
|
62 string filename;
|
|
63 FastaReference(void) : usingmmap(false) { }
|
|
64 ~FastaReference(void);
|
|
65 FILE* file;
|
|
66 void* filemm;
|
|
67 size_t filesize;
|
|
68 FastaIndex* index;
|
|
69 vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart);
|
|
70 string getSequence(string seqname);
|
|
71 // potentially useful for performance, investigate
|
|
72 // void getSequence(string seqname, string& sequence);
|
|
73 string getSubSequence(string seqname, int start, int length);
|
|
74 string sequenceNameStartingWith(string seqnameStart);
|
|
75 long unsigned int sequenceLength(string seqname);
|
|
76 };
|
|
77
|
|
78 #endif
|