Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/shuffleBed/shuffleBed.cpp @ 0:dfcd8b6c1bda
Uploaded
author | aaronquinlan |
---|---|
date | Thu, 03 Nov 2011 10:25:04 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfcd8b6c1bda |
---|---|
1 /***************************************************************************** | |
2 shuffleBed.cpp | |
3 | |
4 (c) 2009 - Aaron Quinlan | |
5 Hall Laboratory | |
6 Department of Biochemistry and Molecular Genetics | |
7 University of Virginia | |
8 aaronquinlan@gmail.com | |
9 | |
10 Licenced under the GNU General Public License 2.0 license. | |
11 ******************************************************************************/ | |
12 #include "lineFileUtilities.h" | |
13 #include "shuffleBed.h" | |
14 | |
15 | |
16 BedShuffle::BedShuffle(string &bedFile, string &genomeFile, string &excludeFile, string &includeFile, | |
17 bool haveSeed, bool haveExclude, bool haveInclude, bool sameChrom, | |
18 float overlapFraction, int seed) { | |
19 | |
20 _bedFile = bedFile; | |
21 _genomeFile = genomeFile; | |
22 _excludeFile = excludeFile; | |
23 _includeFile = includeFile; | |
24 _sameChrom = sameChrom; | |
25 _haveExclude = haveExclude; | |
26 _haveInclude = haveInclude; | |
27 _overlapFraction = overlapFraction; | |
28 _haveSeed = haveSeed; | |
29 | |
30 | |
31 // use the supplied seed for the random | |
32 // number generation if given. else, | |
33 // roll our own. | |
34 if (_haveSeed) { | |
35 _seed = seed; | |
36 srand(seed); | |
37 } | |
38 else { | |
39 // thanks to Rob Long for the tip. | |
40 _seed = (unsigned)time(0)+(unsigned)getpid(); | |
41 srand(_seed); | |
42 } | |
43 | |
44 _bed = new BedFile(bedFile); | |
45 _genome = new GenomeFile(genomeFile); | |
46 _chroms = _genome->getChromList(); | |
47 _numChroms = _genome->getNumberOfChroms(); | |
48 | |
49 if (_haveExclude) { | |
50 _exclude = new BedFile(excludeFile); | |
51 _exclude->loadBedFileIntoMap(); | |
52 } | |
53 | |
54 if (_haveInclude) { | |
55 _include = new BedFile(includeFile); | |
56 _include->loadBedFileIntoMapNoBin(); | |
57 | |
58 _numIncludeChroms = 0; | |
59 masterBedMapNoBin::const_iterator it = _include->bedMapNoBin.begin(); | |
60 masterBedMapNoBin::const_iterator itEnd = _include->bedMapNoBin.end(); | |
61 for(; it != itEnd; ++it) { | |
62 _includeChroms.push_back(it->first); | |
63 _numIncludeChroms++; | |
64 } | |
65 } | |
66 | |
67 if (_haveExclude == true && _haveInclude == false) | |
68 ShuffleWithExclusions(); | |
69 else if (_haveExclude == false && _haveInclude == true) | |
70 ShuffleWithInclusions(); | |
71 else | |
72 Shuffle(); | |
73 } | |
74 | |
75 | |
76 BedShuffle::~BedShuffle(void) { | |
77 | |
78 } | |
79 | |
80 | |
81 void BedShuffle::Shuffle() { | |
82 | |
83 int lineNum = 0; | |
84 BED bedEntry, nullBed; // used to store the current BED line from the BED file. | |
85 BedLineStatus bedStatus; | |
86 | |
87 _bed->Open(); | |
88 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
89 if (bedStatus == BED_VALID) { | |
90 ChooseLocus(bedEntry); | |
91 _bed->reportBedNewLine(bedEntry); | |
92 bedEntry = nullBed; | |
93 } | |
94 } | |
95 _bed->Close(); | |
96 } | |
97 | |
98 | |
99 | |
100 void BedShuffle::ShuffleWithExclusions() { | |
101 | |
102 int lineNum = 0; | |
103 BED bedEntry, nullBed; // used to store the current BED line from the BED file. | |
104 BedLineStatus bedStatus; | |
105 | |
106 _bed->Open(); | |
107 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
108 if (bedStatus == BED_VALID) { | |
109 // keep looking as long as the chosen | |
110 // locus happens to overlap with regions | |
111 // that the user wishes to exclude. | |
112 int tries = 0; | |
113 bool haveOverlap = false; | |
114 do | |
115 { | |
116 // choose a new locus | |
117 ChooseLocus(bedEntry); | |
118 haveOverlap = _exclude->FindOneOrMoreOverlapsPerBin(bedEntry.chrom, bedEntry.start, bedEntry.end, | |
119 bedEntry.strand, false, _overlapFraction); | |
120 tries++; | |
121 } while ((haveOverlap == true) && (tries <= MAX_TRIES)); | |
122 | |
123 | |
124 if (tries > MAX_TRIES) { | |
125 cerr << "Error, line " << lineNum << ": tried " << MAX_TRIES << " potential loci for entry, but could not avoid excluded regions. Ignoring entry and moving on." << endl; | |
126 } | |
127 else { | |
128 _bed->reportBedNewLine(bedEntry); | |
129 } | |
130 } | |
131 bedEntry = nullBed; | |
132 } | |
133 _bed->Close(); | |
134 } | |
135 | |
136 | |
137 void BedShuffle::ShuffleWithInclusions() { | |
138 | |
139 int lineNum = 0; | |
140 BED bedEntry, nullBed; // used to store the current BED line from the BED file. | |
141 BedLineStatus bedStatus; | |
142 | |
143 _bed->Open(); | |
144 while ((bedStatus = _bed->GetNextBed(bedEntry, lineNum)) != BED_INVALID) { | |
145 if (bedStatus == BED_VALID) { | |
146 // choose a new locus | |
147 ChooseLocusFromInclusionFile(bedEntry); | |
148 _bed->reportBedNewLine(bedEntry); | |
149 } | |
150 bedEntry = nullBed; | |
151 } | |
152 _bed->Close(); | |
153 } | |
154 | |
155 | |
156 void BedShuffle::ChooseLocus(BED &bedEntry) { | |
157 | |
158 string chrom = bedEntry.chrom; | |
159 CHRPOS start = bedEntry.start; | |
160 CHRPOS end = bedEntry.end; | |
161 CHRPOS length = end - start; | |
162 | |
163 string randomChrom; | |
164 CHRPOS randomStart; | |
165 CHRPOS chromSize; | |
166 | |
167 if (_sameChrom == false) { | |
168 randomChrom = _chroms[rand() % _numChroms]; | |
169 chromSize = _genome->getChromSize(randomChrom); | |
170 randomStart = rand() % chromSize; | |
171 bedEntry.chrom = randomChrom; | |
172 bedEntry.start = randomStart; | |
173 bedEntry.end = randomStart + length; | |
174 } | |
175 else { | |
176 chromSize = _genome->getChromSize(chrom); | |
177 randomStart = rand() % chromSize; | |
178 bedEntry.start = randomStart; | |
179 bedEntry.end = randomStart + length; | |
180 } | |
181 | |
182 // ensure that the chosen location doesn't go past | |
183 // the length of the chromosome. if so, keep looking | |
184 // for a new spot. | |
185 while (bedEntry.end > chromSize) { | |
186 if (_sameChrom == false) { | |
187 randomChrom = _chroms[rand() % _numChroms]; | |
188 chromSize = _genome->getChromSize(randomChrom); | |
189 randomStart = rand() % chromSize; | |
190 bedEntry.chrom = randomChrom; | |
191 bedEntry.start = randomStart; | |
192 bedEntry.end = randomStart + length; | |
193 } | |
194 else { | |
195 chromSize = _genome->getChromSize(chrom); | |
196 randomStart = rand() % chromSize; | |
197 bedEntry.start = randomStart; | |
198 bedEntry.end = randomStart + length; | |
199 } | |
200 } | |
201 } | |
202 | |
203 | |
204 void BedShuffle::ChooseLocusFromInclusionFile(BED &bedEntry) { | |
205 | |
206 string chrom = bedEntry.chrom; | |
207 CHRPOS length = bedEntry.end - bedEntry.start; | |
208 | |
209 string randomChrom; | |
210 CHRPOS randomStart; | |
211 BED includeInterval; | |
212 | |
213 if (_sameChrom == false) { | |
214 | |
215 // grab a random chromosome from the inclusion file. | |
216 randomChrom = _includeChroms[rand() % _numIncludeChroms]; | |
217 // get the number of inclusion intervals for that chrom | |
218 size_t size = _include->bedMapNoBin[randomChrom].size(); | |
219 // grab a random interval on the chosen chromosome. | |
220 size_t interval = rand() % size; | |
221 // retreive a ranom -incl interval on the selected chrom | |
222 includeInterval = _include->bedMapNoBin[randomChrom][interval]; | |
223 | |
224 bedEntry.chrom = randomChrom; | |
225 } | |
226 else { | |
227 // get the number of inclusion intervals for the original chrom | |
228 size_t size = _include->bedMapNoBin[chrom].size(); | |
229 // grab a random interval on the chosen chromosome. | |
230 includeInterval = _include->bedMapNoBin[chrom][rand() % size]; | |
231 } | |
232 | |
233 randomStart = includeInterval.start + rand() % (includeInterval.size()); | |
234 bedEntry.start = randomStart; | |
235 bedEntry.end = randomStart + length; | |
236 | |
237 // use recursion to ensure that the chosen location | |
238 // doesn't go past the end of the chrom | |
239 if (bedEntry.end > ((size_t) _genome->getChromSize(chrom))) { | |
240 //bedEntry.end = _genome->getChromSize(chrom); | |
241 ChooseLocusFromInclusionFile(bedEntry); | |
242 } | |
243 } | |
244 |