Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 fastaFromBed.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include "lineFileUtilities.h" | |
13 #include "fastaFromBed.h" | |
14 | |
15 | |
16 Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, | |
17 const string &fastaOutFile, bool useFasta, bool useStrand) { | |
18 | |
19 _useName = useName; | |
20 _dbFile = dbFile; | |
21 _bedFile = bedFile; | |
22 _fastaOutFile = fastaOutFile; | |
23 _useFasta = useFasta; | |
24 _useStrand = useStrand; | |
25 | |
26 _bed = new BedFile(_bedFile); | |
27 | |
28 // Figure out what the output file should be. | |
29 if (fastaOutFile == "stdout") { | |
30 _faOut = &cout; | |
31 } | |
32 else { | |
33 // Make sure we can open the file. | |
34 ofstream fa(fastaOutFile.c_str(), ios::out); | |
35 if ( !fa ) { | |
36 cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl; | |
37 exit (1); | |
38 } | |
39 else { | |
40 fa.close(); | |
41 _faOut = new ofstream(fastaOutFile.c_str(), ios::out); | |
42 } | |
43 } | |
44 | |
45 // Extract the requested intervals from the FASTA input file. | |
46 ExtractDNA(); | |
47 } | |
48 | |
49 | |
50 Bed2Fa::~Bed2Fa(void) { | |
51 } | |
52 | |
53 | |
54 //****************************************************************************** | |
55 // ReportDNA | |
56 //****************************************************************************** | |
57 void Bed2Fa::ReportDNA(const BED &bed, string &dna) { | |
58 | |
59 // revcomp if necessary. Thanks to Thomas Doktor. | |
60 if ((_useStrand == true) && (bed.strand == "-")) | |
61 reverseComplement(dna); | |
62 | |
63 if (!(_useName)) { | |
64 if (_useFasta == true) { | |
65 if (_useStrand == true) | |
66 *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; | |
67 else | |
68 *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; | |
69 } | |
70 else { | |
71 if (_useStrand == true) | |
72 *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; | |
73 else | |
74 *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; | |
75 } | |
76 } | |
77 else { | |
78 if (_useFasta == true) | |
79 *_faOut << ">" << bed.name << endl << dna << endl; | |
80 else | |
81 *_faOut << bed.name << "\t" << dna << endl; | |
82 } | |
83 } | |
84 | |
85 | |
86 | |
87 //****************************************************************************** | |
88 // ExtractDNA | |
89 //****************************************************************************** | |
90 void Bed2Fa::ExtractDNA() { | |
91 | |
92 /* Make sure that we can oen all of the files successfully*/ | |
93 | |
94 // open the fasta database for reading | |
95 ifstream faDb(_dbFile.c_str(), ios::in); | |
96 if ( !faDb ) { | |
97 cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; | |
98 exit (1); | |
99 } | |
100 | |
101 // open and memory-map genome file | |
102 FastaReference *fr = new FastaReference; | |
103 bool memmap = true; | |
104 fr->open(_dbFile, memmap); | |
105 | |
106 BED bed, nullBed; | |
107 int lineNum = 0; | |
108 BedLineStatus bedStatus; | |
109 string sequence; | |
110 | |
111 _bed->Open(); | |
112 while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { | |
113 if (bedStatus == BED_VALID) { | |
114 // make sure we are extracting >= 1 bp | |
115 if (bed.zeroLength == false) { | |
116 size_t seqLength = fr->sequenceLength(bed.chrom); | |
117 // make sure this feature will not exceed the end of the chromosome. | |
118 if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) | |
119 { | |
120 int length = bed.end - bed.start; | |
121 sequence = fr->getSubSequence(bed.chrom, bed.start, length); | |
122 ReportDNA(bed, sequence); | |
123 } | |
124 else | |
125 { | |
126 cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " | |
127 << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; | |
128 } | |
129 } | |
130 // handle zeroLength | |
131 else { | |
132 cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; | |
133 } | |
134 bed = nullBed; | |
135 } | |
136 } | |
137 _bed->Close(); | |
138 } | |
139 | |
140 | |
141 |