Mercurial > repos > fubar > bigwig_outlier_bed
comparison bigwigtobed.python.txt @ 0:94fe59a127a0 draft default tip
planemo upload for repository https://github.com/jackh726/bigtools commit ce6b9f638ebcebcad5a5b10219f252962f30e5cc-dirty
| author | fubar |
|---|---|
| date | Mon, 01 Jul 2024 03:31:33 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:94fe59a127a0 |
|---|---|
| 1 #raw | |
| 2 """ | |
| 3 Ross Lazarus June 2024 for VGP | |
| 4 Bigwigs are great, but hard to reliably "see" small low coverage or small very high coverage regions. | |
| 5 Colouring in JB2 tracks will need a new plugin, so this code will find bigwig regions above and below a chosen percentile point. | |
| 6 0.99 and 0.01 work well in testing with a minimum span of 10 bp. | |
| 7 Multiple bigwigs **with the same reference** can be combined - bed segments will be named appropriately | |
| 8 Combining multiple references works but is silly because only display will rely on one reference so others will not be shown... | |
| 9 Tricksy numpy method from http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | |
| 10 takes about 95 seconds for a 17MB test wiggle | |
| 11 JBrowse2 bed normally displays ignore the score, so could provide separate low/high bed file outputs as an option. | |
| 12 Update june 30 2024: wrote a 'no-build' plugin for beds to display red/blue if >0/<0 so those are used for scores | |
| 13 Bed interval naming must be short for JB2 but needs input bigwig name and (lo or hi). | |
| 14 """ | |
| 15 import argparse | |
| 16 import numpy as np | |
| 17 import pybigtools | |
| 18 import sys | |
| 19 from pathlib import Path | |
| 20 class findOut(): | |
| 21 def __init__(self, args): | |
| 22 self.bwnames=args.bigwig | |
| 23 self.bwlabels=args.bigwiglabels | |
| 24 self.bedwin=args.minwin | |
| 25 self.qlo=args.qlo | |
| 26 self.qhi=args.qhi | |
| 27 self.bedouthilo=args.bedouthilo | |
| 28 self.bedouthi=args.bedouthi | |
| 29 self.bedoutlo=args.bedoutlo | |
| 30 self.tableout = args.tableout | |
| 31 self.bedwin = args.minwin | |
| 32 self.qhi = args.qhi | |
| 33 self.qlo = args.qlo | |
| 34 self.makeBed() | |
| 35 def processVals(self, bw, isTop): | |
| 36 # http://gregoryzynda.com/python/numpy/contiguous/interval/2019/11/29/contiguous-regions.html | |
| 37 if isTop: | |
| 38 bwex = np.r_[False, bw >= self.bwtop, False] # extend with 0s | |
| 39 else: | |
| 40 bwex = np.r_[False, bw <= self.bwbot, False] | |
| 41 bwexd = np.diff(bwex) | |
| 42 bwexdnz = bwexd.nonzero()[0] | |
| 43 bwregions = np.reshape(bwexdnz, (-1,2)) | |
| 44 return bwregions | |
| 45 def writeBed(self, bed, bedfname): | |
| 46 """ | |
| 47 potentially multiple | |
| 48 """ | |
| 49 bed.sort() | |
| 50 beds = ['%s\t%d\t%d\t%s\t%d' % x for x in bed] | |
| 51 with open(bedfname, "w") as bedf: | |
| 52 bedf.write('\n'.join(beds)) | |
| 53 bedf.write('\n') | |
| 54 print('Wrote %d bed regions to %s' % (len(bed), bedfname)) | |
| 55 def makeBed(self): | |
| 56 bedhi = [] | |
| 57 bedlo = [] | |
| 58 bwlabels = self.bwlabels | |
| 59 bwnames = self.bwnames | |
| 60 print('bwnames=', bwnames, "bwlabs=", bwlabels) | |
| 61 for i, bwname in enumerate(bwnames): | |
| 62 bwlabel = bwlabels[i].replace(" ",'') | |
| 63 p = Path('in.bw') | |
| 64 p.symlink_to( bwname ) # required by pybigtools (!) | |
| 65 bwf = pybigtools.open('in.bw') | |
| 66 chrlist = bwf.chroms() | |
| 67 chrs = list(chrlist.keys()) | |
| 68 chrs.sort() | |
| 69 restab = ["contig\tn\tmean\tstd\tmin\tmax\tqtop\tqbot"] | |
| 70 for chr in chrs: | |
| 71 bw = bwf.values(chr) | |
| 72 bw = bw[~np.isnan(bw)] # some have NaN if parts of a contig not covered | |
| 73 if self.qhi is not None: | |
| 74 self.bwtop = np.quantile(bw, self.qhi) | |
| 75 bwhi = self.processVals(bw, isTop=True) | |
| 76 for i, seg in enumerate(bwhi): | |
| 77 if seg[1] - seg[0] >= self.bedwin: | |
| 78 bedhi.append((chr, seg[0], seg[1], '%s_hi' % (bwlabel), 1)) | |
| 79 if self.qlo is not None: | |
| 80 self.bwbot = np.quantile(bw, self.qlo) | |
| 81 bwlo = self.processVals(bw, isTop=False) | |
| 82 for i, seg in enumerate(bwlo): | |
| 83 if seg[1] - seg[0] >= self.bedwin: | |
| 84 bedlo.append((chr, seg[0], seg[1], '%s_lo' % (bwlabel), -1)) | |
| 85 bwmean = np.mean(bw) | |
| 86 bwstd = np.std(bw) | |
| 87 bwmax = np.max(bw) | |
| 88 nrow = np.size(bw) | |
| 89 bwmin = np.min(bw) | |
| 90 restab.append('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f' % (chr,nrow,bwmean,bwstd,bwmin,bwmax,self.bwtop,self.bwbot)) | |
| 91 print('\n'.join(restab), '\n') | |
| 92 if self.tableout: | |
| 93 with open(self.tableout) as t: | |
| 94 t.write('\n'.join(restab)) | |
| 95 t.write('\n') | |
| 96 if self.bedoutlo: | |
| 97 if self.qlo: | |
| 98 self.writeBed(bedlo, self.bedoutlo) | |
| 99 if self.bedouthi: | |
| 100 if self.qhi: | |
| 101 self.writeBed(bedhi, self.bedouthi) | |
| 102 if self.bedouthilo: | |
| 103 allbed = bedlo + bedhi | |
| 104 self.writeBed(allbed, self.bedouthilo) | |
| 105 return restab | |
| 106 if __name__ == "__main__": | |
| 107 parser = argparse.ArgumentParser() | |
| 108 a = parser.add_argument | |
| 109 a('-m', '--minwin',default=10, type=int) | |
| 110 a('-l', '--qlo',default=None, type=float) | |
| 111 a('-i', '--qhi',default=None, type=float) | |
| 112 a('-w', '--bigwig', nargs='+') | |
| 113 a('-n', '--bigwiglabels', nargs='+') | |
| 114 a('-o', '--bedouthilo', default=None, help="optional high and low combined bed") | |
| 115 a('-u', '--bedouthi', default=None, help="optional high only bed") | |
| 116 a('-b', '--bedoutlo', default=None, help="optional low only bed") | |
| 117 a('-t', '--tableout', default=None) | |
| 118 args = parser.parse_args() | |
| 119 print('args=', args) | |
| 120 if not (args.bedouthilo or args.bedouthi or args.bedoutlo): | |
| 121 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need a bed output choice - must be one of low only, high only or both combined") | |
| 122 sys.exit(2) | |
| 123 if not (args.qlo or args.qhi): | |
| 124 sys.stderr.write("bigwig_outlier_bed.py cannot usefully run - need one or both of quantile cutpoints qhi and qlo") | |
| 125 sys.exit(2) | |
| 126 restab = findOut(args) | |
| 127 if args.tableout: | |
| 128 with open(args.tableout, 'w') as tout: | |
| 129 tout.write('\n'.join(restab)) | |
| 130 tout.write('\n') | |
| 131 #end raw |
