Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp @ 1:bec36315bd12 default tip
Deleted selected files
author | aaronquinlan |
---|---|
date | Sat, 19 Nov 2011 14:17:03 -0500 |
parents | dfcd8b6c1bda |
children |
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/nucBed/nucBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,158 +0,0 @@ -/***************************************************************************** - nucBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "nucBed.h" - - -NucBed::NucBed(string &dbFile, string &bedFile, bool printSeq, - bool hasPattern, const string &pattern, bool forceStrand) { - - _dbFile = dbFile; - _bedFile = bedFile; - _printSeq = printSeq; - _hasPattern = hasPattern; - _pattern = pattern; - _forceStrand = forceStrand; - - _bed = new BedFile(_bedFile); - - // Compute the DNA content in each BED/GFF/VCF interval - ProfileDNA(); -} - - -NucBed::~NucBed(void) -{} - - -void NucBed::ReportDnaProfile(const BED& bed, const string &sequence, int seqLength) -{ - int a,c,g,t,n,other,userPatternCount; - a = c = g = t = n = other = userPatternCount = 0; - - getDnaContent(sequence,a,c,g,t,n,other); - - if (_hasPattern) - userPatternCount = countPattern(sequence, _pattern); - - - // report the original interval - _bed->reportBedTab(bed); - // report AT and GC content - printf("%f\t%f\t",(float)(a+t)/seqLength, (float)(c+g)/seqLength); - // report raw nucleotide counts - printf("%d\t%d\t%d\t%d\t%d\t%d\t%d",a,c,g,t,n,other,seqLength); - // add the original sequence if requested. - - if (_printSeq) - printf("\t%s",sequence.c_str()); - if (_hasPattern) - printf("\t%d",userPatternCount); - printf("\n"); - -} - - -void NucBed::PrintHeader(void) { - printf("#"); - - int numOrigColumns = (int) _bed->bedType; - for (int i = 1; i <= numOrigColumns; ++i) { - printf("%d_usercol\t", i); - } - printf("%d_pct_at\t", numOrigColumns + 1); - printf("%d_pct_gc\t", numOrigColumns + 2); - printf("%d_num_A\t", numOrigColumns + 3); - printf("%d_num_C\t", numOrigColumns + 4); - printf("%d_num_G\t", numOrigColumns + 5); - printf("%d_num_T\t", numOrigColumns + 6); - printf("%d_num_N\t", numOrigColumns + 7); - printf("%d_num_oth\t", numOrigColumns + 8); - printf("%d_seq_len\t", numOrigColumns + 9); - - if (_printSeq) - printf("%d_seq", numOrigColumns + 10); - if (_hasPattern && !_printSeq) - printf("%d_user_patt_count", numOrigColumns + 10); - else if (_hasPattern && _printSeq) - printf("\t%d_user_patt_count", numOrigColumns + 11); - printf("\n"); - -} - - -//****************************************************************************** -// ExtractDNA -//****************************************************************************** -void NucBed::ProfileDNA() { - - /* Make sure that we can oen all of the files successfully*/ - - // open the fasta database for reading - ifstream faDb(_dbFile.c_str(), ios::in); - if ( !faDb ) { - cerr << "Error: The requested fasta database file (" << _dbFile << ") could not be opened. Exiting!" << endl; - exit (1); - } - - // open and memory-map genome file - FastaReference fr; - bool memmap = true; - fr.open(_dbFile, memmap); - - bool headerReported = false; - BED bed, nullBed; - int lineNum = 0; - BedLineStatus bedStatus; - string sequence; - - _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bed, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - if (headerReported == false) { - PrintHeader(); - headerReported = true; - } - // make sure we are extracting >= 1 bp - if (bed.zeroLength == false) { - size_t seqLength = fr.sequenceLength(bed.chrom); - // make sure this feature will not exceed the end of the chromosome. - if ( (bed.start <= seqLength) && (bed.end <= seqLength) ) - { - // grab the dna at this interval - int length = bed.end - bed.start; - // report the sequence's content - string dna = fr.getSubSequence(bed.chrom, bed.start, length); - // rev comp si necessaire - if ((_forceStrand == true) && (bed.strand == "-")) - reverseComplement(dna); - ReportDnaProfile(bed, dna, length); - bed = nullBed; - } - else - { - cerr << "Feature (" << bed.chrom << ":" << bed.start << "-" << bed.end << ") beyond the length of " - << bed.chrom << " size (" << seqLength << " bp). Skipping." << endl; - } - } - // handle zeroLength - else { - cerr << "Feature (" << bed.chrom << ":" << bed.start+1 << "-" << bed.end-1 << ") has length = 0, Skipping." << endl; - } - bed = nullBed; - } - } - _bed->Close(); -} - - -