comparison BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 fastaFromBed.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "fastaFromBed.h"
14
15
16 Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile,
17 const string &fastaOutFile, bool useFasta, bool useStrand) {
18
19 _useName = useName;
20 _dbFile = dbFile;
21 _bedFile = bedFile;
22 _fastaOutFile = fastaOutFile;
23 _useFasta = useFasta;
24 _useStrand = useStrand;
25
26 _bed = new BedFile(_bedFile);
27
28 // Figure out what the output file should be.
29 if (fastaOutFile == "stdout") {
30 _faOut = &cout;
31 }
32 else {
33 // Make sure we can open the file.
34 ofstream fa(fastaOutFile.c_str(), ios::out);
35 if ( !fa ) {
36 cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl;
37 exit (1);
38 }
39 else {
40 fa.close();
41 _faOut = new ofstream(fastaOutFile.c_str(), ios::out);
42 }
43 }
44
45 // Extract the requested intervals from the FASTA input file.
46 ExtractDNA();
47 }
48
49
50 Bed2Fa::~Bed2Fa(void) {
51 }
52
53
54 //******************************************************************************
55 // ReportDNA
56 //******************************************************************************
57 void Bed2Fa::ReportDNA(const BED &bed, string &dna) {
58
59 // revcomp if necessary. Thanks to Thomas Doktor.
60 if ((_useStrand == true) && (bed.strand == "-"))
61 reverseComplement(dna);
62
63 if (!(_useName)) {
64 if (_useFasta == true) {
65 if (_useStrand == true)
66 *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl;
67 else
68 *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl;
69 }
70 else {
71 if (_useStrand == true)
72 *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl;
73 else
74 *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl;
75 }
76 }
77 else {
78 if (_useFasta == true)
79 *_faOut << ">" << bed.name << endl << dna << endl;
80 else
81 *_faOut << bed.name << "\t" << dna << endl;
82 }
83 }
84
85
86
87 //******************************************************************************
88 // ExtractDNA
89 //******************************************************************************
90 void Bed2Fa::ExtractDNA() {
91
92 /* Make sure that we can oen all of the files successfully*/
93
94 // open the fasta database for reading
95 ifstream faDb(_dbFile.c_str(), ios::in);
96 if ( !faDb ) {
97 cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl;
98 exit (1);
99 }
100
101 // open and memory-map genome file
102 FastaReference *fr = new FastaReference;
103 bool memmap = true;
104 fr->open(_dbFile, memmap);
105
106 BED bed, nullBed;
107 int lineNum = 0;
108 BedLineStatus bedStatus;
109 string sequence;
110
111 _bed->Open();
112 while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) {
113 if (bedStatus == BED_VALID) {
114 // make sure we are extracting >= 1 bp
115 if (bed.zeroLength == false) {
116 size_t seqLength = fr->sequenceLength(bed.chrom);
117 // make sure this feature will not exceed the end of the chromosome.
118 if ( (bed.start <= seqLength) && (bed.end <= seqLength) )
119 {
120 int length = bed.end - bed.start;
121 sequence = fr->getSubSequence(bed.chrom, bed.start, length);
122 ReportDNA(bed, sequence);
123 }
124 else
125 {
126 cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of "
127 << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl;
128 }
129 }
130 // handle zeroLength
131 else {
132 cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl;
133 }
134 bed = nullBed;
135 }
136 }
137 _bed->Close();
138 }
139
140
141