Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp Thu Nov 03 10:25:04 2011 -0400 @@ -0,0 +1,141 @@ +/***************************************************************************** + fastaFromBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "fastaFromBed.h" + + +Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, + const string &fastaOutFile, bool useFasta, bool useStrand) { + + _useName = useName; + _dbFile = dbFile; + _bedFile = bedFile; + _fastaOutFile = fastaOutFile; + _useFasta = useFasta; + _useStrand = useStrand; + + _bed = new BedFile(_bedFile); + + // Figure out what the output file should be. + if (fastaOutFile == "stdout") { + _faOut = &cout; + } + else { + // Make sure we can open the file. + ofstream fa(fastaOutFile.c_str(), ios::out); + if ( !fa ) { + cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + else { + fa.close(); + _faOut = new ofstream(fastaOutFile.c_str(), ios::out); + } + } + + // Extract the requested intervals from the FASTA input file. + ExtractDNA(); +} + + +Bed2Fa::~Bed2Fa(void) { +} + + +//****************************************************************************** +// ReportDNA +//****************************************************************************** +void Bed2Fa::ReportDNA(const BED &bed, string &dna) { + + // revcomp if necessary. Thanks to Thomas Doktor. + if ((_useStrand == true) && (bed.strand == "-")) + reverseComplement(dna); + + if (!(_useName)) { + if (_useFasta == true) { + if (_useStrand == true) + *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; + else + *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; + } + else { + if (_useStrand == true) + *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; + else + *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; + } + } + else { + if (_useFasta == true) + *_faOut << ">" << bed.name << endl << dna << endl; + else + *_faOut << bed.name << "\t" << dna << endl; + } +} + + + +//****************************************************************************** +// ExtractDNA +//****************************************************************************** +void Bed2Fa::ExtractDNA() { + + /* Make sure that we can oen all of the files successfully*/ + + // open the fasta database for reading + ifstream faDb(_dbFile.c_str(), ios::in); + if ( !faDb ) { + cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; + exit (1); + } + + // open and memory-map genome file + FastaReference *fr = new FastaReference; + bool memmap = true; + fr->open(_dbFile, memmap); + + BED bed, nullBed; + int lineNum = 0; + BedLineStatus bedStatus; + string sequence; + + _bed->Open(); + while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + // make sure we are extracting >= 1 bp + if (bed.zeroLength == false) { + size_t seqLength = fr->sequenceLength(bed.chrom); + // make sure this feature will not exceed the end of the chromosome. + if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) + { + int length = bed.end - bed.start; + sequence = fr->getSubSequence(bed.chrom, bed.start, length); + ReportDNA(bed, sequence); + } + else + { + cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " + << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; + } + } + // handle zeroLength + else { + cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; + } + bed = nullBed; + } + } + _bed->Close(); +} + + +