Mercurial > repos > greg > ideas_preprocessor
comparison runproc.R @ 22:3651f1592f3f draft
Uploaded
author | greg |
---|---|
date | Wed, 31 Jan 2018 14:13:11 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
21:99102499271a | 22:3651f1592f3f |
---|---|
1 normdata<-function(case, control, method="nbinom") | |
2 { d=dnorm(-10:10,0,10); | |
3 u= c(rep(0,10),control,rep(0,10)); | |
4 l=length(case); | |
5 nu=rep(0,l); | |
6 for(i in 1:21) | |
7 { nu = nu + u[i-1+1:l] * d[i]; | |
8 } | |
9 nu = nu / sum(d); | |
10 | |
11 t = which(case<quantile(case,prob=0.99)) | |
12 p=mean(case[t])/var(case[t]); | |
13 if(p>0.9) p=0.9; | |
14 if(p<0.1) p=0.1; | |
15 n=mean(case[t])*p/(1-p); | |
16 print(c(n,p)); | |
17 #nc = log2((case*20+1)/(nu/mean(nu)*mean(case)*20+1)+1)-1; | |
18 if(method=="nbinom") | |
19 { nc = -pnbinom(case,size=c((nu+1)/mean(nu+1)*n),prob=p,lower.tail=F,log.p=T)/log(10); | |
20 } else | |
21 { nc = -ppois(case,(nu+1)/mean(nu+1)*mean(case),lower.tail=F,log.p=T)/log(10); | |
22 } | |
23 return(nc); | |
24 } | |
25 | |
26 binsz=200 | |
27 folder="me66/Input/" | |
28 mbed="/gpfs/group/yzz2/default/IDEAS_server/data/mm10.bed" | |
29 | |
30 #read file list | |
31 args<-commandArgs(trailingOnly=TRUE); | |
32 flist=args[1]; | |
33 x=as.matrix(read.table(flist)); | |
34 if(length(args) > 1) | |
35 { st=as.integer(args[2]); | |
36 ed=as.integer(args[3]); | |
37 } else | |
38 { st=1; | |
39 ed=dim(x)[1]; | |
40 } | |
41 l=paste(x[,1],x[,2]); | |
42 ll=unique(l); | |
43 id=rep(0,length(l)); | |
44 for(i in ll) | |
45 { t=which(l==i); | |
46 if(length(t)>1) | |
47 { id[t]=1:length(t); | |
48 } | |
49 } | |
50 | |
51 y=x[st:ed,1:3]; | |
52 if(dim(x)[2]==4) | |
53 { y=rbind(y,x[st:ed,c(1:2,4)]); } | |
54 t=match(unique(y[,3]),y[,3]); | |
55 k=rapply(as.list(y[t,3]),function(z){tail(unlist(gregexpr("\\/",z)),n=1);}); | |
56 y=cbind(y[t,],substr(y[t,3],k+1,1000)); | |
57 | |
58 #fetch data and process data to windows mean | |
59 for(i in 1:dim(y)[1]) | |
60 { fout = paste(folder, y[i,1],"_",y[i,2],"_",y[i,4],sep=""); | |
61 system(paste("wget -O",fout, y[i,3])); | |
62 tl = tail(unlist(gregexpr("\\.",fout)),n=1); | |
63 name = substr(fout,1,tl); | |
64 ftype = substr(fout, tl+1, 100000); | |
65 if(tolower(ftype) == "bam") | |
66 { system(paste("samtools index", fout)); | |
67 bw=paste(name,".bw",sep=""); | |
68 system(paste("bamCoverage --bam",fout,"-o",bw,"--binSize", binsz)); | |
69 } else | |
70 { bw=fout; | |
71 } | |
72 bd=paste(name,".bed",sep=""); | |
73 system(paste("/storage/home/yzz2/work/tools/bigWigAverageOverBed", bw, mbed, "stdout | cut -f5 >", bd)); | |
74 system(paste("gzip -f",bd)); | |
75 system(paste("rm ", name,".ba*",sep="")); | |
76 system(paste("rm ", name,".bw",sep="")); | |
77 } | |
78 | |
79 #normalize data if inputs are available, and prepare data file list | |
80 fname=NULL; | |
81 for(i in st:ed) | |
82 { k=match(x[i,3],y[,3]); | |
83 f1=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep=""); | |
84 if(dim(x)[2]==4) | |
85 { k=match(x[i,4],y[,3]); | |
86 f0=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep=""); | |
87 if(id[i]>0) | |
88 { nf=paste(folder,x[i,1],".",x[i,2],"_",id[i],".norm.bed",sep=""); | |
89 } else | |
90 { nf=paste(folder,x[i,1],".",x[i,2],".norm.bed",sep=""); | |
91 } | |
92 x1=as.matrix(read.table(f1)); | |
93 x0=as.matrix(read.table(f0)); | |
94 nx=normdata(x1,x0); | |
95 write.table(round(nx*100)/100,nf,quote=F,row.names=F,col.names=F); | |
96 system(paste("gzip -f",nf)); | |
97 fname=rbind(fname,c(x[i,1:2],paste(nf,".gz",sep=""))); | |
98 } else | |
99 { nf=paste(fold,x[i,1],".",x[i,2],".bed.gz",sep=""); | |
100 system(paste("mv", f1, nf)); | |
101 fname=rbind(fname,c(x[i,1:2],nf)); | |
102 } | |
103 } | |
104 | |
105 write.table(fname,paste(folder,"tmp_",st,"_",ed,".input",sep=""),quote=F,row.names=F,col.names=F); |