Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp Thu Nov 03 10:25:04 2011 -0400 @@ -0,0 +1,244 @@ +/***************************************************************************** + shuffleBed.cpp + + (c) 2009 - Aaron Quinlan + Hall Laboratory + Department of Biochemistry and Molecular Genetics + University of Virginia + aaronquinlan@gmail.com + + Licenced under the GNU General Public License 2.0 license. +******************************************************************************/ +#include "lineFileUtilities.h" +#include "shuffleBed.h" + + +BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, + bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, + float overlapFraction, int seed) { + + _bedFile = bedFile; + _genomeFile = genomeFile; + _excludeFile = excludeFile; + _includeFile = includeFile; + _sameChrom = sameChrom; + _haveExclude = haveExclude; + _haveInclude = haveInclude; + _overlapFraction = overlapFraction; + _haveSeed = haveSeed; + + + // use the supplied seed for the random + // number generation if given. else, + // roll our own. + if (_haveSeed) { + _seed = seed; + srand(seed); + } + else { + // thanks to Rob Long for the tip. + _seed = (unsigned)time(0)+(unsigned)getpid(); + srand(_seed); + } + + _bed = new BedFile(bedFile); + _genome = new GenomeFile(genomeFile); + _chroms = _genome->getChromList(); + _numChroms = _genome->getNumberOfChroms(); + + if (_haveExclude) { + _exclude = new BedFile(excludeFile); + _exclude->loadBedFileIntoMap(); + } + + if (_haveInclude) { + _include = new BedFile(includeFile); + _include->loadBedFileIntoMapNoBin(); + + _numIncludeChroms = 0; + masterBedMapNoBin::const_iterator it = _include->bedMapNoBin.begin(); + masterBedMapNoBin::const_iterator itEnd = _include->bedMapNoBin.end(); + for(; it != itEnd; ++it) { + _includeChroms.push_back(it->first); + _numIncludeChroms++; + } + } + + if (_haveExclude == true && _haveInclude == false) + ShuffleWithExclusions(); + else if (_haveExclude == false && _haveInclude == true) + ShuffleWithInclusions(); + else + Shuffle(); +} + + +BedShuffle::~BedShuffle(void) { + +} + + +void BedShuffle::Shuffle() { + + int lineNum = 0; + BED bedEntry, nullBed; // used to store the current BED line from the BED file. + BedLineStatus bedStatus; + + _bed->Open(); + while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + ChooseLocus(bedEntry); + _bed->reportBedNewLine(bedEntry); + bedEntry = nullBed; + } + } + _bed->Close(); +} + + + +void BedShuffle::ShuffleWithExclusions() { + + int lineNum = 0; + BED bedEntry, nullBed; // used to store the current BED line from the BED file. + BedLineStatus bedStatus; + + _bed->Open(); + while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + // keep looking as long as the chosen + // locus happens to overlap with regions + // that the user wishes to exclude. + int tries = 0; + bool haveOverlap = false; + do + { + // choose a new locus + ChooseLocus(bedEntry); + haveOverlap = _exclude->FindOneOrMoreOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end, + bedEntry.strand, false, _overlapFraction); + tries++; + } while ((haveOverlap == true) && (tries <= MAX_TRIES)); + + + if (tries > MAX_TRIES) { + cerr << "Error, line " << lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions. Ignoring entry and moving on." << endl; + } + else { + _bed->reportBedNewLine(bedEntry); + } + } + bedEntry = nullBed; + } + _bed->Close(); +} + + +void BedShuffle::ShuffleWithInclusions() { + + int lineNum = 0; + BED bedEntry, nullBed; // used to store the current BED line from the BED file. + BedLineStatus bedStatus; + + _bed->Open(); + while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { + if (bedStatus == BED_VALID) { + // choose a new locus + ChooseLocusFromInclusionFile(bedEntry); + _bed->reportBedNewLine(bedEntry); + } + bedEntry = nullBed; + } + _bed->Close(); +} + + +void BedShuffle::ChooseLocus(BED &bedEntry) { + + string chrom = bedEntry.chrom; + CHRPOS start = bedEntry.start; + CHRPOS end = bedEntry.end; + CHRPOS length = end - start; + + string randomChrom; + CHRPOS randomStart; + CHRPOS chromSize; + + if (_sameChrom == false) { + randomChrom = _chroms[rand() % _numChroms]; + chromSize = _genome->getChromSize(randomChrom); + randomStart = rand() % chromSize; + bedEntry.chrom = randomChrom; + bedEntry.start = randomStart; + bedEntry.end = randomStart + length; + } + else { + chromSize = _genome->getChromSize(chrom); + randomStart = rand() % chromSize; + bedEntry.start = randomStart; + bedEntry.end = randomStart + length; + } + + // ensure that the chosen location doesn't go past + // the length of the chromosome. if so, keep looking + // for a new spot. + while (bedEntry.end > chromSize) { + if (_sameChrom == false) { + randomChrom = _chroms[rand() % _numChroms]; + chromSize = _genome->getChromSize(randomChrom); + randomStart = rand() % chromSize; + bedEntry.chrom = randomChrom; + bedEntry.start = randomStart; + bedEntry.end = randomStart + length; + } + else { + chromSize = _genome->getChromSize(chrom); + randomStart = rand() % chromSize; + bedEntry.start = randomStart; + bedEntry.end = randomStart + length; + } + } +} + + +void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) { + + string chrom = bedEntry.chrom; + CHRPOS length = bedEntry.end - bedEntry.start; + + string randomChrom; + CHRPOS randomStart; + BED includeInterval; + + if (_sameChrom == false) { + + // grab a random chromosome from the inclusion file. + randomChrom = _includeChroms[rand() % _numIncludeChroms]; + // get the number of inclusion intervals for that chrom + size_t size = _include->bedMapNoBin[randomChrom].size(); + // grab a random interval on the chosen chromosome. + size_t interval = rand() % size; + // retreive a ranom -incl interval on the selected chrom + includeInterval = _include->bedMapNoBin[randomChrom][interval]; + + bedEntry.chrom = randomChrom; + } + else { + // get the number of inclusion intervals for the original chrom + size_t size = _include->bedMapNoBin[chrom].size(); + // grab a random interval on the chosen chromosome. + includeInterval = _include->bedMapNoBin[chrom][rand() % size]; + } + + randomStart = includeInterval.start + rand() % (includeInterval.size()); + bedEntry.start = randomStart; + bedEntry.end = randomStart + length; + + // use recursion to ensure that the chosen location + // doesn't go past the end of the chrom + if (bedEntry.end > ((size_t) _genome->getChromSize(chrom))) { + //bedEntry.end = _genome->getChromSize(chrom); + ChooseLocusFromInclusionFile(bedEntry); + } +} +