Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp @ 0:dfcd8b6c1bda
Uploaded
| author | aaronquinlan |
|---|---|
| date | Thu, 03 Nov 2011 10:25:04 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfcd8b6c1bda |
|---|---|
| 1 /***************************************************************************** | |
| 2 shuffleBed.cpp | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #include "lineFileUtilities.h" | |
| 13 #include "shuffleBed.h" | |
| 14 | |
| 15 | |
| 16 BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, | |
| 17 bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, | |
| 18 float overlapFraction, int seed) { | |
| 19 | |
| 20 _bedFile = bedFile; | |
| 21 _genomeFile = genomeFile; | |
| 22 _excludeFile = excludeFile; | |
| 23 _includeFile = includeFile; | |
| 24 _sameChrom = sameChrom; | |
| 25 _haveExclude = haveExclude; | |
| 26 _haveInclude = haveInclude; | |
| 27 _overlapFraction = overlapFraction; | |
| 28 _haveSeed = haveSeed; | |
| 29 | |
| 30 | |
| 31 // use the supplied seed for the random | |
| 32 // number generation if given. else, | |
| 33 // roll our own. | |
| 34 if (_haveSeed) { | |
| 35 _seed = seed; | |
| 36 srand(seed); | |
| 37 } | |
| 38 else { | |
| 39 // thanks to Rob Long for the tip. | |
| 40 _seed = (unsigned)time(0)+(unsigned)getpid(); | |
| 41 srand(_seed); | |
| 42 } | |
| 43 | |
| 44 _bed = new BedFile(bedFile); | |
| 45 _genome = new GenomeFile(genomeFile); | |
| 46 _chroms = _genome->getChromList(); | |
| 47 _numChroms = _genome->getNumberOfChroms(); | |
| 48 | |
| 49 if (_haveExclude) { | |
| 50 _exclude = new BedFile(excludeFile); | |
| 51 _exclude->loadBedFileIntoMap(); | |
| 52 } | |
| 53 | |
| 54 if (_haveInclude) { | |
| 55 _include = new BedFile(includeFile); | |
| 56 _include->loadBedFileIntoMapNoBin(); | |
| 57 | |
| 58 _numIncludeChroms = 0; | |
| 59 masterBedMapNoBin::const_iterator it = _include->bedMapNoBin.begin(); | |
| 60 masterBedMapNoBin::const_iterator itEnd = _include->bedMapNoBin.end(); | |
| 61 for(; it != itEnd; ++it) { | |
| 62 _includeChroms.push_back(it->first); | |
| 63 _numIncludeChroms++; | |
| 64 } | |
| 65 } | |
| 66 | |
| 67 if (_haveExclude == true && _haveInclude == false) | |
| 68 ShuffleWithExclusions(); | |
| 69 else if (_haveExclude == false && _haveInclude == true) | |
| 70 ShuffleWithInclusions(); | |
| 71 else | |
| 72 Shuffle(); | |
| 73 } | |
| 74 | |
| 75 | |
| 76 BedShuffle::~BedShuffle(void) { | |
| 77 | |
| 78 } | |
| 79 | |
| 80 | |
| 81 void BedShuffle::Shuffle() { | |
| 82 | |
| 83 int lineNum = 0; | |
| 84 BED bedEntry, nullBed; // used to store the current BED line from the BED file. | |
| 85 BedLineStatus bedStatus; | |
| 86 | |
| 87 _bed->Open(); | |
| 88 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
| 89 if (bedStatus == BED_VALID) { | |
| 90 ChooseLocus(bedEntry); | |
| 91 _bed->reportBedNewLine(bedEntry); | |
| 92 bedEntry = nullBed; | |
| 93 } | |
| 94 } | |
| 95 _bed->Close(); | |
| 96 } | |
| 97 | |
| 98 | |
| 99 | |
| 100 void BedShuffle::ShuffleWithExclusions() { | |
| 101 | |
| 102 int lineNum = 0; | |
| 103 BED bedEntry, nullBed; // used to store the current BED line from the BED file. | |
| 104 BedLineStatus bedStatus; | |
| 105 | |
| 106 _bed->Open(); | |
| 107 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
| 108 if (bedStatus == BED_VALID) { | |
| 109 // keep looking as long as the chosen | |
| 110 // locus happens to overlap with regions | |
| 111 // that the user wishes to exclude. | |
| 112 int tries = 0; | |
| 113 bool haveOverlap = false; | |
| 114 do | |
| 115 { | |
| 116 // choose a new locus | |
| 117 ChooseLocus(bedEntry); | |
| 118 haveOverlap = _exclude->FindOneOrMoreOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end, | |
| 119 bedEntry.strand, false, _overlapFraction); | |
| 120 tries++; | |
| 121 } while ((haveOverlap == true) && (tries <= MAX_TRIES)); | |
| 122 | |
| 123 | |
| 124 if (tries > MAX_TRIES) { | |
| 125 cerr << "Error, line " << lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions. Ignoring entry and moving on." << endl; | |
| 126 } | |
| 127 else { | |
| 128 _bed->reportBedNewLine(bedEntry); | |
| 129 } | |
| 130 } | |
| 131 bedEntry = nullBed; | |
| 132 } | |
| 133 _bed->Close(); | |
| 134 } | |
| 135 | |
| 136 | |
| 137 void BedShuffle::ShuffleWithInclusions() { | |
| 138 | |
| 139 int lineNum = 0; | |
| 140 BED bedEntry, nullBed; // used to store the current BED line from the BED file. | |
| 141 BedLineStatus bedStatus; | |
| 142 | |
| 143 _bed->Open(); | |
| 144 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
| 145 if (bedStatus == BED_VALID) { | |
| 146 // choose a new locus | |
| 147 ChooseLocusFromInclusionFile(bedEntry); | |
| 148 _bed->reportBedNewLine(bedEntry); | |
| 149 } | |
| 150 bedEntry = nullBed; | |
| 151 } | |
| 152 _bed->Close(); | |
| 153 } | |
| 154 | |
| 155 | |
| 156 void BedShuffle::ChooseLocus(BED &bedEntry) { | |
| 157 | |
| 158 string chrom = bedEntry.chrom; | |
| 159 CHRPOS start = bedEntry.start; | |
| 160 CHRPOS end = bedEntry.end; | |
| 161 CHRPOS length = end - start; | |
| 162 | |
| 163 string randomChrom; | |
| 164 CHRPOS randomStart; | |
| 165 CHRPOS chromSize; | |
| 166 | |
| 167 if (_sameChrom == false) { | |
| 168 randomChrom = _chroms[rand() % _numChroms]; | |
| 169 chromSize = _genome->getChromSize(randomChrom); | |
| 170 randomStart = rand() % chromSize; | |
| 171 bedEntry.chrom = randomChrom; | |
| 172 bedEntry.start = randomStart; | |
| 173 bedEntry.end = randomStart + length; | |
| 174 } | |
| 175 else { | |
| 176 chromSize = _genome->getChromSize(chrom); | |
| 177 randomStart = rand() % chromSize; | |
| 178 bedEntry.start = randomStart; | |
| 179 bedEntry.end = randomStart + length; | |
| 180 } | |
| 181 | |
| 182 // ensure that the chosen location doesn't go past | |
| 183 // the length of the chromosome. if so, keep looking | |
| 184 // for a new spot. | |
| 185 while (bedEntry.end > chromSize) { | |
| 186 if (_sameChrom == false) { | |
| 187 randomChrom = _chroms[rand() % _numChroms]; | |
| 188 chromSize = _genome->getChromSize(randomChrom); | |
| 189 randomStart = rand() % chromSize; | |
| 190 bedEntry.chrom = randomChrom; | |
| 191 bedEntry.start = randomStart; | |
| 192 bedEntry.end = randomStart + length; | |
| 193 } | |
| 194 else { | |
| 195 chromSize = _genome->getChromSize(chrom); | |
| 196 randomStart = rand() % chromSize; | |
| 197 bedEntry.start = randomStart; | |
| 198 bedEntry.end = randomStart + length; | |
| 199 } | |
| 200 } | |
| 201 } | |
| 202 | |
| 203 | |
| 204 void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) { | |
| 205 | |
| 206 string chrom = bedEntry.chrom; | |
| 207 CHRPOS length = bedEntry.end - bedEntry.start; | |
| 208 | |
| 209 string randomChrom; | |
| 210 CHRPOS randomStart; | |
| 211 BED includeInterval; | |
| 212 | |
| 213 if (_sameChrom == false) { | |
| 214 | |
| 215 // grab a random chromosome from the inclusion file. | |
| 216 randomChrom = _includeChroms[rand() % _numIncludeChroms]; | |
| 217 // get the number of inclusion intervals for that chrom | |
| 218 size_t size = _include->bedMapNoBin[randomChrom].size(); | |
| 219 // grab a random interval on the chosen chromosome. | |
| 220 size_t interval = rand() % size; | |
| 221 // retreive a ranom -incl interval on the selected chrom | |
| 222 includeInterval = _include->bedMapNoBin[randomChrom][interval]; | |
| 223 | |
| 224 bedEntry.chrom = randomChrom; | |
| 225 } | |
| 226 else { | |
| 227 // get the number of inclusion intervals for the original chrom | |
| 228 size_t size = _include->bedMapNoBin[chrom].size(); | |
| 229 // grab a random interval on the chosen chromosome. | |
| 230 includeInterval = _include->bedMapNoBin[chrom][rand() % size]; | |
| 231 } | |
| 232 | |
| 233 randomStart = includeInterval.start + rand() % (includeInterval.size()); | |
| 234 bedEntry.start = randomStart; | |
| 235 bedEntry.end = randomStart + length; | |
| 236 | |
| 237 // use recursion to ensure that the chosen location | |
| 238 // doesn't go past the end of the chrom | |
| 239 if (bedEntry.end > ((size_t) _genome->getChromSize(chrom))) { | |
| 240 //bedEntry.end = _genome->getChromSize(chrom); | |
| 241 ChooseLocusFromInclusionFile(bedEntry); | |
| 242 } | |
| 243 } | |
| 244 |
