comparison BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp @ 0:dfcd8b6c1bda

Uploaded
author aaronquinlan
date Thu, 03 Nov 2011 10:25:04 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfcd8b6c1bda
1 /*****************************************************************************
2 shuffleBed.cpp
3
4 (c) 2009 - Aaron Quinlan
5 Hall Laboratory
6 Department of Biochemistry and Molecular Genetics
7 University of Virginia
8 aaronquinlan@gmail.com
9
10 Licenced under the GNU General Public License 2.0 license.
11 ******************************************************************************/
12 #include "lineFileUtilities.h"
13 #include "shuffleBed.h"
14
15
16 BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile,
17 bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom,
18 float overlapFraction, int seed) {
19
20 _bedFile = bedFile;
21 _genomeFile = genomeFile;
22 _excludeFile = excludeFile;
23 _includeFile = includeFile;
24 _sameChrom = sameChrom;
25 _haveExclude = haveExclude;
26 _haveInclude = haveInclude;
27 _overlapFraction = overlapFraction;
28 _haveSeed = haveSeed;
29
30
31 // use the supplied seed for the random
32 // number generation if given. else,
33 // roll our own.
34 if (_haveSeed) {
35 _seed = seed;
36 srand(seed);
37 }
38 else {
39 // thanks to Rob Long for the tip.
40 _seed = (unsigned)time(0)+(unsigned)getpid();
41 srand(_seed);
42 }
43
44 _bed = new BedFile(bedFile);
45 _genome = new GenomeFile(genomeFile);
46 _chroms = _genome->getChromList();
47 _numChroms = _genome->getNumberOfChroms();
48
49 if (_haveExclude) {
50 _exclude = new BedFile(excludeFile);
51 _exclude->loadBedFileIntoMap();
52 }
53
54 if (_haveInclude) {
55 _include = new BedFile(includeFile);
56 _include->loadBedFileIntoMapNoBin();
57
58 _numIncludeChroms = 0;
59 masterBedMapNoBin::const_iterator it = _include->bedMapNoBin.begin();
60 masterBedMapNoBin::const_iterator itEnd = _include->bedMapNoBin.end();
61 for(; it != itEnd; ++it) {
62 _includeChroms.push_back(it->first);
63 _numIncludeChroms++;
64 }
65 }
66
67 if (_haveExclude == true && _haveInclude == false)
68 ShuffleWithExclusions();
69 else if (_haveExclude == false && _haveInclude == true)
70 ShuffleWithInclusions();
71 else
72 Shuffle();
73 }
74
75
76 BedShuffle::~BedShuffle(void) {
77
78 }
79
80
81 void BedShuffle::Shuffle() {
82
83 int lineNum = 0;
84 BED bedEntry, nullBed; // used to store the current BED line from the BED file.
85 BedLineStatus bedStatus;
86
87 _bed->Open();
88 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
89 if (bedStatus == BED_VALID) {
90 ChooseLocus(bedEntry);
91 _bed->reportBedNewLine(bedEntry);
92 bedEntry = nullBed;
93 }
94 }
95 _bed->Close();
96 }
97
98
99
100 void BedShuffle::ShuffleWithExclusions() {
101
102 int lineNum = 0;
103 BED bedEntry, nullBed; // used to store the current BED line from the BED file.
104 BedLineStatus bedStatus;
105
106 _bed->Open();
107 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
108 if (bedStatus == BED_VALID) {
109 // keep looking as long as the chosen
110 // locus happens to overlap with regions
111 // that the user wishes to exclude.
112 int tries = 0;
113 bool haveOverlap = false;
114 do
115 {
116 // choose a new locus
117 ChooseLocus(bedEntry);
118 haveOverlap = _exclude->FindOneOrMoreOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end,
119 bedEntry.strand, false, _overlapFraction);
120 tries++;
121 } while ((haveOverlap == true) && (tries <= MAX_TRIES));
122
123
124 if (tries > MAX_TRIES) {
125 cerr << "Error, line " << lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions. Ignoring entry and moving on." << endl;
126 }
127 else {
128 _bed->reportBedNewLine(bedEntry);
129 }
130 }
131 bedEntry = nullBed;
132 }
133 _bed->Close();
134 }
135
136
137 void BedShuffle::ShuffleWithInclusions() {
138
139 int lineNum = 0;
140 BED bedEntry, nullBed; // used to store the current BED line from the BED file.
141 BedLineStatus bedStatus;
142
143 _bed->Open();
144 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) {
145 if (bedStatus == BED_VALID) {
146 // choose a new locus
147 ChooseLocusFromInclusionFile(bedEntry);
148 _bed->reportBedNewLine(bedEntry);
149 }
150 bedEntry = nullBed;
151 }
152 _bed->Close();
153 }
154
155
156 void BedShuffle::ChooseLocus(BED &bedEntry) {
157
158 string chrom = bedEntry.chrom;
159 CHRPOS start = bedEntry.start;
160 CHRPOS end = bedEntry.end;
161 CHRPOS length = end - start;
162
163 string randomChrom;
164 CHRPOS randomStart;
165 CHRPOS chromSize;
166
167 if (_sameChrom == false) {
168 randomChrom = _chroms[rand() % _numChroms];
169 chromSize = _genome->getChromSize(randomChrom);
170 randomStart = rand() % chromSize;
171 bedEntry.chrom = randomChrom;
172 bedEntry.start = randomStart;
173 bedEntry.end = randomStart + length;
174 }
175 else {
176 chromSize = _genome->getChromSize(chrom);
177 randomStart = rand() % chromSize;
178 bedEntry.start = randomStart;
179 bedEntry.end = randomStart + length;
180 }
181
182 // ensure that the chosen location doesn't go past
183 // the length of the chromosome. if so, keep looking
184 // for a new spot.
185 while (bedEntry.end > chromSize) {
186 if (_sameChrom == false) {
187 randomChrom = _chroms[rand() % _numChroms];
188 chromSize = _genome->getChromSize(randomChrom);
189 randomStart = rand() % chromSize;
190 bedEntry.chrom = randomChrom;
191 bedEntry.start = randomStart;
192 bedEntry.end = randomStart + length;
193 }
194 else {
195 chromSize = _genome->getChromSize(chrom);
196 randomStart = rand() % chromSize;
197 bedEntry.start = randomStart;
198 bedEntry.end = randomStart + length;
199 }
200 }
201 }
202
203
204 void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) {
205
206 string chrom = bedEntry.chrom;
207 CHRPOS length = bedEntry.end - bedEntry.start;
208
209 string randomChrom;
210 CHRPOS randomStart;
211 BED includeInterval;
212
213 if (_sameChrom == false) {
214
215 // grab a random chromosome from the inclusion file.
216 randomChrom = _includeChroms[rand() % _numIncludeChroms];
217 // get the number of inclusion intervals for that chrom
218 size_t size = _include->bedMapNoBin[randomChrom].size();
219 // grab a random interval on the chosen chromosome.
220 size_t interval = rand() % size;
221 // retreive a ranom -incl interval on the selected chrom
222 includeInterval = _include->bedMapNoBin[randomChrom][interval];
223
224 bedEntry.chrom = randomChrom;
225 }
226 else {
227 // get the number of inclusion intervals for the original chrom
228 size_t size = _include->bedMapNoBin[chrom].size();
229 // grab a random interval on the chosen chromosome.
230 includeInterval = _include->bedMapNoBin[chrom][rand() % size];
231 }
232
233 randomStart = includeInterval.start + rand() % (includeInterval.size());
234 bedEntry.start = randomStart;
235 bedEntry.end = randomStart + length;
236
237 // use recursion to ensure that the chosen location
238 // doesn't go past the end of the chrom
239 if (bedEntry.end > ((size_t) _genome->getChromSize(chrom))) {
240 //bedEntry.end = _genome->getChromSize(chrom);
241 ChooseLocusFromInclusionFile(bedEntry);
242 }
243 }
244