Mercurial > repos > aaronquinlan > multi_intersect
diff BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp @ 1:bec36315bd12 default tip
Deleted selected files
author | aaronquinlan |
---|---|
date | Sat, 19 Nov 2011 14:17:03 -0500 |
parents | dfcd8b6c1bda |
children |
line wrap: on
line diff
--- a/BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp Thu Nov 03 10:25:04 2011 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,244 +0,0 @@ -/***************************************************************************** - shuffleBed.cpp - - (c) 2009 - Aaron Quinlan - Hall Laboratory - Department of Biochemistry and Molecular Genetics - University of Virginia - aaronquinlan@gmail.com - - Licenced under the GNU General Public License 2.0 license. -******************************************************************************/ -#include "lineFileUtilities.h" -#include "shuffleBed.h" - - -BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, - bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, - float overlapFraction, int seed) { - - _bedFile = bedFile; - _genomeFile = genomeFile; - _excludeFile = excludeFile; - _includeFile = includeFile; - _sameChrom = sameChrom; - _haveExclude = haveExclude; - _haveInclude = haveInclude; - _overlapFraction = overlapFraction; - _haveSeed = haveSeed; - - - // use the supplied seed for the random - // number generation if given. else, - // roll our own. - if (_haveSeed) { - _seed = seed; - srand(seed); - } - else { - // thanks to Rob Long for the tip. - _seed = (unsigned)time(0)+(unsigned)getpid(); - srand(_seed); - } - - _bed = new BedFile(bedFile); - _genome = new GenomeFile(genomeFile); - _chroms = _genome->getChromList(); - _numChroms = _genome->getNumberOfChroms(); - - if (_haveExclude) { - _exclude = new BedFile(excludeFile); - _exclude->loadBedFileIntoMap(); - } - - if (_haveInclude) { - _include = new BedFile(includeFile); - _include->loadBedFileIntoMapNoBin(); - - _numIncludeChroms = 0; - masterBedMapNoBin::const_iterator it = _include->bedMapNoBin.begin(); - masterBedMapNoBin::const_iterator itEnd = _include->bedMapNoBin.end(); - for(; it != itEnd; ++it) { - _includeChroms.push_back(it->first); - _numIncludeChroms++; - } - } - - if (_haveExclude == true && _haveInclude == false) - ShuffleWithExclusions(); - else if (_haveExclude == false && _haveInclude == true) - ShuffleWithInclusions(); - else - Shuffle(); -} - - -BedShuffle::~BedShuffle(void) { - -} - - -void BedShuffle::Shuffle() { - - int lineNum = 0; - BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; - - _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - ChooseLocus(bedEntry); - _bed->reportBedNewLine(bedEntry); - bedEntry = nullBed; - } - } - _bed->Close(); -} - - - -void BedShuffle::ShuffleWithExclusions() { - - int lineNum = 0; - BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; - - _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - // keep looking as long as the chosen - // locus happens to overlap with regions - // that the user wishes to exclude. - int tries = 0; - bool haveOverlap = false; - do - { - // choose a new locus - ChooseLocus(bedEntry); - haveOverlap = _exclude->FindOneOrMoreOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end, - bedEntry.strand, false, _overlapFraction); - tries++; - } while ((haveOverlap == true) && (tries <= MAX_TRIES)); - - - if (tries > MAX_TRIES) { - cerr << "Error, line " << lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions. Ignoring entry and moving on." << endl; - } - else { - _bed->reportBedNewLine(bedEntry); - } - } - bedEntry = nullBed; - } - _bed->Close(); -} - - -void BedShuffle::ShuffleWithInclusions() { - - int lineNum = 0; - BED bedEntry, nullBed; // used to store the current BED line from the BED file. - BedLineStatus bedStatus; - - _bed->Open(); - while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { - if (bedStatus == BED_VALID) { - // choose a new locus - ChooseLocusFromInclusionFile(bedEntry); - _bed->reportBedNewLine(bedEntry); - } - bedEntry = nullBed; - } - _bed->Close(); -} - - -void BedShuffle::ChooseLocus(BED &bedEntry) { - - string chrom = bedEntry.chrom; - CHRPOS start = bedEntry.start; - CHRPOS end = bedEntry.end; - CHRPOS length = end - start; - - string randomChrom; - CHRPOS randomStart; - CHRPOS chromSize; - - if (_sameChrom == false) { - randomChrom = _chroms[rand() % _numChroms]; - chromSize = _genome->getChromSize(randomChrom); - randomStart = rand() % chromSize; - bedEntry.chrom = randomChrom; - bedEntry.start = randomStart; - bedEntry.end = randomStart + length; - } - else { - chromSize = _genome->getChromSize(chrom); - randomStart = rand() % chromSize; - bedEntry.start = randomStart; - bedEntry.end = randomStart + length; - } - - // ensure that the chosen location doesn't go past - // the length of the chromosome. if so, keep looking - // for a new spot. - while (bedEntry.end > chromSize) { - if (_sameChrom == false) { - randomChrom = _chroms[rand() % _numChroms]; - chromSize = _genome->getChromSize(randomChrom); - randomStart = rand() % chromSize; - bedEntry.chrom = randomChrom; - bedEntry.start = randomStart; - bedEntry.end = randomStart + length; - } - else { - chromSize = _genome->getChromSize(chrom); - randomStart = rand() % chromSize; - bedEntry.start = randomStart; - bedEntry.end = randomStart + length; - } - } -} - - -void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) { - - string chrom = bedEntry.chrom; - CHRPOS length = bedEntry.end - bedEntry.start; - - string randomChrom; - CHRPOS randomStart; - BED includeInterval; - - if (_sameChrom == false) { - - // grab a random chromosome from the inclusion file. - randomChrom = _includeChroms[rand() % _numIncludeChroms]; - // get the number of inclusion intervals for that chrom - size_t size = _include->bedMapNoBin[randomChrom].size(); - // grab a random interval on the chosen chromosome. - size_t interval = rand() % size; - // retreive a ranom -incl interval on the selected chrom - includeInterval = _include->bedMapNoBin[randomChrom][interval]; - - bedEntry.chrom = randomChrom; - } - else { - // get the number of inclusion intervals for the original chrom - size_t size = _include->bedMapNoBin[chrom].size(); - // grab a random interval on the chosen chromosome. - includeInterval = _include->bedMapNoBin[chrom][rand() % size]; - } - - randomStart = includeInterval.start + rand() % (includeInterval.size()); - bedEntry.start = randomStart; - bedEntry.end = randomStart + length; - - // use recursion to ensure that the chosen location - // doesn't go past the end of the chrom - if (bedEntry.end > ((size_t) _genome->getChromSize(chrom))) { - //bedEntry.end = _genome->getChromSize(chrom); - ChooseLocusFromInclusionFile(bedEntry); - } -} -