Mercurial > repos > iuc > cwpair2
comparison cwpair2_util.py @ 2:d856e96b8ce2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cwpair2 commit ea49259a702be2cc2b70166770d09177019bb210
| author | iuc |
|---|---|
| date | Wed, 11 May 2016 04:00:46 -0400 |
| parents | afa47be0dc76 |
| children | 0533034acd09 |
comparison
equal
deleted
inserted
replaced
| 1:dd727dec864f | 2:d856e96b8ce2 |
|---|---|
| 1 import bisect | 1 import bisect |
| 2 import csv | 2 import csv |
| 3 import os | 3 import os |
| 4 import sys | 4 import sys |
| 5 import traceback | 5 import traceback |
| 6 | |
| 6 import matplotlib | 7 import matplotlib |
| 7 matplotlib.use('Agg') | 8 matplotlib.use('Agg') # noqa |
| 8 from matplotlib import pyplot | 9 from matplotlib import pyplot |
| 9 | 10 |
| 10 # Data outputs | 11 # Data outputs |
| 11 DETAILS = 'D' | 12 DETAILS = 'D' |
| 12 MATCHED_PAIRS = 'MP' | 13 MATCHED_PAIRS = 'MP' |
| 45 | 46 |
| 46 def get_bin(self, x): | 47 def get_bin(self, x): |
| 47 """ | 48 """ |
| 48 Returns the bin in which a data point falls | 49 Returns the bin in which a data point falls |
| 49 """ | 50 """ |
| 50 return self.start + (x-self.start) // self.binsize * self.binsize + self.binsize/2.0 | 51 return self.start + (x - self.start) // self.binsize * self.binsize + self.binsize / 2.0 |
| 51 | 52 |
| 52 def add(self, x): | 53 def add(self, x): |
| 53 x = self.get_bin(x) | 54 x = self.get_bin(x) |
| 54 self.dist[x] = self.dist.get(x, 0) + 1 | 55 self.dist[x] = self.dist.get(x, 0) + 1 |
| 55 | 56 |
| 73 sys.stderr.write(msg) | 74 sys.stderr.write(msg) |
| 74 sys.exit(1) | 75 sys.exit(1) |
| 75 | 76 |
| 76 | 77 |
| 77 def distance(peak1, peak2): | 78 def distance(peak1, peak2): |
| 78 return (peak2[1]+peak2[2])/2 - (peak1[1]+peak1[2])/2 | 79 return (peak2[1] + peak2[2]) / 2 - (peak1[1] + peak1[2]) / 2 |
| 79 | 80 |
| 80 | 81 |
| 81 def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}): | 82 def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}): |
| 82 return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs)) | 83 return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs)) |
| 83 | 84 |
| 112 for peaks in chromosomes.values(): | 113 for peaks in chromosomes.values(): |
| 113 for peak in peaks: | 114 for peak in peaks: |
| 114 values.append(peak[3]) | 115 values.append(peak[3]) |
| 115 values.sort() | 116 values.sort() |
| 116 # Get 95% value | 117 # Get 95% value |
| 117 return values[int(len(values)*0.95)] | 118 return values[int(len(values) * 0.95)] |
| 118 | 119 |
| 119 | 120 |
| 120 def filter(chromosomes, threshold=0.05): | 121 def filter(chromosomes, threshold=0.05): |
| 121 """ | 122 """ |
| 122 Filters the peaks to those above a threshold. Threshold < 1.0 is interpreted | 123 Filters the peaks to those above a threshold. Threshold < 1.0 is interpreted |
| 147 dist.add(distance(peak, cpeak)) | 148 dist.add(distance(peak, cpeak)) |
| 148 return dist | 149 return dist |
| 149 | 150 |
| 150 | 151 |
| 151 def make_keys(crick): | 152 def make_keys(crick): |
| 152 return [(data[1] + data[2])//2 for data in crick] | 153 return [(data[1] + data[2]) // 2 for data in crick] |
| 153 | 154 |
| 154 | 155 |
| 155 def get_window(crick, peak, up_distance, down_distance, keys=None): | 156 def get_window(crick, peak, up_distance, down_distance, keys=None): |
| 156 """ | 157 """ |
| 157 Returns a window of all crick peaks within a distance of a watson peak. | 158 Returns a window of all crick peaks within a distance of a watson peak. |
| 188 | 189 |
| 189 | 190 |
| 190 def match_mode(window, peak, mode): | 191 def match_mode(window, peak, mode): |
| 191 if not window: | 192 if not window: |
| 192 return None | 193 return None |
| 193 return min(window, key=lambda cpeak: abs(distance(peak, cpeak)-mode)) | 194 return min(window, key=lambda cpeak: abs(distance(peak, cpeak) - mode)) |
| 194 | 195 |
| 195 METHODS = {'mode': match_mode, 'closest': match_closest, 'largest': match_largest} | 196 METHODS = {'mode': match_mode, 'closest': match_closest, 'largest': match_largest} |
| 196 | 197 |
| 197 | 198 |
| 198 def frequency_plot(freqs, fname, labels=[], title=''): | 199 def frequency_plot(freqs, fname, labels=[], title=''): |
| 349 cname, | 350 cname, |
| 350 match[1], | 351 match[1], |
| 351 match[2], | 352 match[2], |
| 352 match[3], '-', | 353 match[3], '-', |
| 353 midpoint, | 354 midpoint, |
| 354 peak[3]+match[3], | 355 peak[3] + match[3], |
| 355 d)) | 356 d)) |
| 356 i = bisect.bisect_left(keys, (match[1]+match[2])/2) | 357 i = bisect.bisect_left(keys, (match[1] + match[2]) / 2) |
| 357 del crick[i] | 358 del crick[i] |
| 358 del keys[i] | 359 del keys[i] |
| 359 else: | 360 else: |
| 360 if output_orphans: | 361 if output_orphans: |
| 361 orphan_output.writerow((cname, peak[0], peak[1], peak[2], peak[3])) | 362 orphan_output.writerow((cname, peak[0], peak[1], peak[2], peak[3])) |
