Mercurial > repos > fubar > bigwig_outlier_bed
comparison bigwig_outlier_bed.py @ 0:94fe59a127a0 draft default tip
planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
| author | fubar |
|---|---|
| date | Mon, 01 Jul 2024 03:31:33 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:94fe59a127a0 |
|---|---|
| 1 """ | |
| 2 Ross Lazarus June 2024 for VGP | |
| 3 | |
| 4 Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions. | |
| 5 Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point. | |
| 6 0.99 and 0.01 work well in testing with a minimum span of 10 bp. | |
| 7 | |
| 8 Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately | |
| 9 Combining multiple references works but is silly because only display will rely on one reference so others will not be shown... | |
| 10 | |
| 11 Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | |
| 12 takes about 95 seconds for a 17MB test wiggle | |
| 13 | |
| 14 JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option. | |
| 15 | |
| 16 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores | |
| 17 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). | |
| 18 | |
| 19 """ | |
| 20 | |
| 21 | |
| 22 import argparse | |
| 23 import numpy as np | |
| 24 import pybigtools | |
| 25 import sys | |
| 26 | |
| 27 | |
| 28 from pathlib import Path | |
| 29 | |
| 30 class findOut(): | |
| 31 | |
| 32 def __init__(self, args): | |
| 33 self.bwnames=args.bigwig | |
| 34 self.bwlabels=args.bigwiglabels | |
| 35 self.bedwin=args.minwin | |
| 36 self.qlo=args.qlo | |
| 37 self.qhi=args.qhi | |
| 38 self.bedouthilo=args.bedouthilo | |
| 39 self.bedouthi=args.bedouthi | |
| 40 self.bedoutlo=args.bedoutlo | |
| 41 self.tableout = args.tableout | |
| 42 self.bedwin = args.minwin | |
| 43 self.qhi = args.qhi | |
| 44 self.qlo = args.qlo | |
| 45 self.makeBed() | |
| 46 | |
| 47 def processVals(self, bw, isTop): | |
| 48 # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | |
| 49 if isTop: | |
| 50 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s | |
| 51 else: | |
| 52 bwex = np.r_[False, bw <= self.bwbot, False] | |
| 53 bwexd = np.diff(bwex) | |
| 54 bwexdnz = bwexd.nonzero()[0] | |
| 55 bwregions = np.reshape(bwexdnz, (-1,2)) | |
| 56 return bwregions | |
| 57 | |
| 58 def writeBed(self, bed, bedfname): | |
| 59 """ | |
| 60 potentially multiple | |
| 61 """ | |
| 62 bed.sort() | |
| 63 beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed] | |
| 64 with open(bedfname, "w") as bedf: | |
| 65 bedf.write('\n'.join(beds)) | |
| 66 bedf.write('\n') | |
| 67 print('Wrote %d bed regions to %s' % (len(bed), bedfname)) | |
| 68 | |
| 69 def makeBed(self): | |
| 70 bedhi = [] | |
| 71 bedlo = [] | |
| 72 bwlabels = self.bwlabels | |
| 73 bwnames = self.bwnames | |
| 74 print('bwnames=', bwnames, "bwlabs=", bwlabels) | |
| 75 for i, bwname in enumerate(bwnames): | |
| 76 bwlabel = bwlabels[i].replace(" ",'') | |
| 77 p = Path('in.bw') | |
| 78 p.symlink_to( bwname ) # required by pybigtools (!) | |
| 79 bwf = pybigtools.open('in.bw') | |
| 80 chrlist = bwf.chroms() | |
| 81 chrs = list(chrlist.keys()) | |
| 82 chrs.sort() | |
| 83 restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] | |
| 84 for chr in chrs: | |
| 85 bw = bwf.values(chr) | |
| 86 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered | |
| 87 if self.qhi is not None: | |
| 88 self.bwtop = np.quantile(bw, self.qhi) | |
| 89 bwhi = self.processVals(bw, isTop=True) | |
| 90 for i, seg in enumerate(bwhi): | |
| 91 if seg[1] - seg[0] >= self.bedwin: | |
| 92 bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1)) | |
| 93 if self.qlo is not None: | |
| 94 self.bwbot = np.quantile(bw, self.qlo) | |
| 95 bwlo = self.processVals(bw, isTop=False) | |
| 96 for i, seg in enumerate(bwlo): | |
| 97 if seg[1] - seg[0] >= self.bedwin: | |
| 98 bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1)) | |
| 99 bwmean = np.mean(bw) | |
| 100 bwstd = np.std(bw) | |
| 101 bwmax = np.max(bw) | |
| 102 nrow = np.size(bw) | |
| 103 bwmin = np.min(bw) | |
| 104 restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot)) | |
| 105 print('\n'.join(restab), '\n') | |
| 106 if self.tableout: | |
| 107 with open(self.tableout) as t: | |
| 108 t.write('\n'.join(restab)) | |
| 109 t.write('\n') | |
| 110 if self.bedoutlo: | |
| 111 if self.qlo: | |
| 112 self.writeBed(bedlo, self.bedoutlo) | |
| 113 if self.bedouthi: | |
| 114 if self.qhi: | |
| 115 self.writeBed(bedhi, self.bedouthi) | |
| 116 if self.bedouthilo: | |
| 117 allbed = bedlo + bedhi | |
| 118 self.writeBed(allbed, self.bedouthilo) | |
| 119 return restab | |
| 120 | |
| 121 | |
| 122 if __name__ == "__main__": | |
| 123 parser = argparse.ArgumentParser() | |
| 124 a = parser.add_argument | |
| 125 a('-m', '--minwin',default=10, type=int) | |
| 126 a('-l', '--qlo',default=None, type=float) | |
| 127 a('-i', '--qhi',default=None, type=float) | |
| 128 a('-w', '--bigwig', nargs='+') | |
| 129 a('-n', '--bigwiglabels', nargs='+') | |
| 130 a('-o', '--bedouthilo', default=None, help="optional high and low combined bed") | |
| 131 a('-u', '--bedouthi', default=None, help="optional high only bed") | |
| 132 a('-b', '--bedoutlo', default=None, help="optional low only bed") | |
| 133 a('-t', '--tableout', default=None) | |
| 134 args = parser.parse_args() | |
| 135 print('args=', args) | |
| 136 if not (args.bedouthilo or args.bedouthi or args.bedoutlo): | |
| 137 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined") | |
| 138 sys.exit(2) | |
| 139 if not (args.qlo or args.qhi): | |
| 140 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo") | |
| 141 sys.exit(2) | |
| 142 restab = findOut(args) | |
| 143 if args.tableout: | |
| 144 with open(args.tableout, 'w') as tout: | |
| 145 tout.write('\n'.join(restab)) | |
| 146 tout.write('\n') | |
| 147 | |
| 148 |
