Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/fastaFromBed/fastaFromBed.cpp @ 0:dfcd8b6c1bda
Uploaded
| author | aaronquinlan |
|---|---|
| date | Thu, 03 Nov 2011 10:25:04 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfcd8b6c1bda |
|---|---|
| 1 /***************************************************************************** | |
| 2 fastaFromBed.cpp | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #include "lineFileUtilities.h" | |
| 13 #include "fastaFromBed.h" | |
| 14 | |
| 15 | |
| 16 Bed2Fa::Bed2Fa(bool useName, const string &dbFile, const string &bedFile, | |
| 17 const string &fastaOutFile, bool useFasta, bool useStrand) { | |
| 18 | |
| 19 _useName = useName; | |
| 20 _dbFile = dbFile; | |
| 21 _bedFile = bedFile; | |
| 22 _fastaOutFile = fastaOutFile; | |
| 23 _useFasta = useFasta; | |
| 24 _useStrand = useStrand; | |
| 25 | |
| 26 _bed = new BedFile(_bedFile); | |
| 27 | |
| 28 // Figure out what the output file should be. | |
| 29 if (fastaOutFile == "stdout") { | |
| 30 _faOut = &cout; | |
| 31 } | |
| 32 else { | |
| 33 // Make sure we can open the file. | |
| 34 ofstream fa(fastaOutFile.c_str(), ios::out); | |
| 35 if ( !fa ) { | |
| 36 cerr << "Error: The requested fasta output file (" << fastaOutFile << ") could not be opened. Exiting!" << endl; | |
| 37 exit (1); | |
| 38 } | |
| 39 else { | |
| 40 fa.close(); | |
| 41 _faOut = new ofstream(fastaOutFile.c_str(), ios::out); | |
| 42 } | |
| 43 } | |
| 44 | |
| 45 // Extract the requested intervals from the FASTA input file. | |
| 46 ExtractDNA(); | |
| 47 } | |
| 48 | |
| 49 | |
| 50 Bed2Fa::~Bed2Fa(void) { | |
| 51 } | |
| 52 | |
| 53 | |
| 54 //****************************************************************************** | |
| 55 // ReportDNA | |
| 56 //****************************************************************************** | |
| 57 void Bed2Fa::ReportDNA(const BED &bed, string &dna) { | |
| 58 | |
| 59 // revcomp if necessary. Thanks to Thomas Doktor. | |
| 60 if ((_useStrand == true) && (bed.strand == "-")) | |
| 61 reverseComplement(dna); | |
| 62 | |
| 63 if (!(_useName)) { | |
| 64 if (_useFasta == true) { | |
| 65 if (_useStrand == true) | |
| 66 *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << endl << dna << endl; | |
| 67 else | |
| 68 *_faOut << ">" << bed.chrom << ":" << bed.start << "-" << bed.end << endl << dna << endl; | |
| 69 } | |
| 70 else { | |
| 71 if (_useStrand == true) | |
| 72 *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "(" << bed.strand << ")" << "\t" << dna << endl; | |
| 73 else | |
| 74 *_faOut << bed.chrom << ":" << bed.start << "-" << bed.end << "\t" << dna << endl; | |
| 75 } | |
| 76 } | |
| 77 else { | |
| 78 if (_useFasta == true) | |
| 79 *_faOut << ">" << bed.name << endl << dna << endl; | |
| 80 else | |
| 81 *_faOut << bed.name << "\t" << dna << endl; | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 | |
| 86 | |
| 87 //****************************************************************************** | |
| 88 // ExtractDNA | |
| 89 //****************************************************************************** | |
| 90 void Bed2Fa::ExtractDNA() { | |
| 91 | |
| 92 /* Make sure that we can oen all of the files successfully*/ | |
| 93 | |
| 94 // open the fasta database for reading | |
| 95 ifstream faDb(_dbFile.c_str(), ios::in); | |
| 96 if ( !faDb ) { | |
| 97 cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; | |
| 98 exit (1); | |
| 99 } | |
| 100 | |
| 101 // open and memory-map genome file | |
| 102 FastaReference *fr = new FastaReference; | |
| 103 bool memmap = true; | |
| 104 fr->open(_dbFile, memmap); | |
| 105 | |
| 106 BED bed, nullBed; | |
| 107 int lineNum = 0; | |
| 108 BedLineStatus bedStatus; | |
| 109 string sequence; | |
| 110 | |
| 111 _bed->Open(); | |
| 112 while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { | |
| 113 if (bedStatus == BED_VALID) { | |
| 114 // make sure we are extracting >= 1 bp | |
| 115 if (bed.zeroLength == false) { | |
| 116 size_t seqLength = fr->sequenceLength(bed.chrom); | |
| 117 // make sure this feature will not exceed the end of the chromosome. | |
| 118 if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) | |
| 119 { | |
| 120 int length = bed.end - bed.start; | |
| 121 sequence = fr->getSubSequence(bed.chrom, bed.start, length); | |
| 122 ReportDNA(bed, sequence); | |
| 123 } | |
| 124 else | |
| 125 { | |
| 126 cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " | |
| 127 << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; | |
| 128 } | |
| 129 } | |
| 130 // handle zeroLength | |
| 131 else { | |
| 132 cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; | |
| 133 } | |
| 134 bed = nullBed; | |
| 135 } | |
| 136 } | |
| 137 _bed->Close(); | |
| 138 } | |
| 139 | |
| 140 | |
| 141 |
