Mercurial > repos > greg > ideas_preprocessor
comparison runproc.R @ 22:3651f1592f3f draft
Uploaded
| author | greg |
|---|---|
| date | Wed, 31 Jan 2018 14:13:11 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 21:99102499271a | 22:3651f1592f3f |
|---|---|
| 1 normdata<-function(case, control, method="nbinom") | |
| 2 { d=dnorm(-10:10,0,10); | |
| 3 u= c(rep(0,10),control,rep(0,10)); | |
| 4 l=length(case); | |
| 5 nu=rep(0,l); | |
| 6 for(i in 1:21) | |
| 7 { nu = nu + u[i-1+1:l] * d[i]; | |
| 8 } | |
| 9 nu = nu / sum(d); | |
| 10 | |
| 11 t = which(case<quantile(case,prob=0.99)) | |
| 12 p=mean(case[t])/var(case[t]); | |
| 13 if(p>0.9) p=0.9; | |
| 14 if(p<0.1) p=0.1; | |
| 15 n=mean(case[t])*p/(1-p); | |
| 16 print(c(n,p)); | |
| 17 #nc = log2((case*20+1)/(nu/mean(nu)*mean(case)*20+1)+1)-1; | |
| 18 if(method=="nbinom") | |
| 19 { nc = -pnbinom(case,size=c((nu+1)/mean(nu+1)*n),prob=p,lower.tail=F,log.p=T)/log(10); | |
| 20 } else | |
| 21 { nc = -ppois(case,(nu+1)/mean(nu+1)*mean(case),lower.tail=F,log.p=T)/log(10); | |
| 22 } | |
| 23 return(nc); | |
| 24 } | |
| 25 | |
| 26 binsz=200 | |
| 27 folder="me66/Input/" | |
| 28 mbed="/gpfs/group/yzz2/default/IDEAS_server/data/mm10.bed" | |
| 29 | |
| 30 #read file list | |
| 31 args<-commandArgs(trailingOnly=TRUE); | |
| 32 flist=args[1]; | |
| 33 x=as.matrix(read.table(flist)); | |
| 34 if(length(args) > 1) | |
| 35 { st=as.integer(args[2]); | |
| 36 ed=as.integer(args[3]); | |
| 37 } else | |
| 38 { st=1; | |
| 39 ed=dim(x)[1]; | |
| 40 } | |
| 41 l=paste(x[,1],x[,2]); | |
| 42 ll=unique(l); | |
| 43 id=rep(0,length(l)); | |
| 44 for(i in ll) | |
| 45 { t=which(l==i); | |
| 46 if(length(t)>1) | |
| 47 { id[t]=1:length(t); | |
| 48 } | |
| 49 } | |
| 50 | |
| 51 y=x[st:ed,1:3]; | |
| 52 if(dim(x)[2]==4) | |
| 53 { y=rbind(y,x[st:ed,c(1:2,4)]); } | |
| 54 t=match(unique(y[,3]),y[,3]); | |
| 55 k=rapply(as.list(y[t,3]),function(z){tail(unlist(gregexpr("\\/",z)),n=1);}); | |
| 56 y=cbind(y[t,],substr(y[t,3],k+1,1000)); | |
| 57 | |
| 58 #fetch data and process data to windows mean | |
| 59 for(i in 1:dim(y)[1]) | |
| 60 { fout = paste(folder, y[i,1],"_",y[i,2],"_",y[i,4],sep=""); | |
| 61 system(paste("wget -O",fout, y[i,3])); | |
| 62 tl = tail(unlist(gregexpr("\\.",fout)),n=1); | |
| 63 name = substr(fout,1,tl); | |
| 64 ftype = substr(fout, tl+1, 100000); | |
| 65 if(tolower(ftype) == "bam") | |
| 66 { system(paste("samtools index", fout)); | |
| 67 bw=paste(name,".bw",sep=""); | |
| 68 system(paste("bamCoverage --bam",fout,"-o",bw,"--binSize", binsz)); | |
| 69 } else | |
| 70 { bw=fout; | |
| 71 } | |
| 72 bd=paste(name,".bed",sep=""); | |
| 73 system(paste("/storage/home/yzz2/work/tools/bigWigAverageOverBed", bw, mbed, "stdout | cut -f5 >", bd)); | |
| 74 system(paste("gzip -f",bd)); | |
| 75 system(paste("rm ", name,".ba*",sep="")); | |
| 76 system(paste("rm ", name,".bw",sep="")); | |
| 77 } | |
| 78 | |
| 79 #normalize data if inputs are available, and prepare data file list | |
| 80 fname=NULL; | |
| 81 for(i in st:ed) | |
| 82 { k=match(x[i,3],y[,3]); | |
| 83 f1=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep=""); | |
| 84 if(dim(x)[2]==4) | |
| 85 { k=match(x[i,4],y[,3]); | |
| 86 f0=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep=""); | |
| 87 if(id[i]>0) | |
| 88 { nf=paste(folder,x[i,1],".",x[i,2],"_",id[i],".norm.bed",sep=""); | |
| 89 } else | |
| 90 { nf=paste(folder,x[i,1],".",x[i,2],".norm.bed",sep=""); | |
| 91 } | |
| 92 x1=as.matrix(read.table(f1)); | |
| 93 x0=as.matrix(read.table(f0)); | |
| 94 nx=normdata(x1,x0); | |
| 95 write.table(round(nx*100)/100,nf,quote=F,row.names=F,col.names=F); | |
| 96 system(paste("gzip -f",nf)); | |
| 97 fname=rbind(fname,c(x[i,1:2],paste(nf,".gz",sep=""))); | |
| 98 } else | |
| 99 { nf=paste(fold,x[i,1],".",x[i,2],".bed.gz",sep=""); | |
| 100 system(paste("mv", f1, nf)); | |
| 101 fname=rbind(fname,c(x[i,1:2],nf)); | |
| 102 } | |
| 103 } | |
| 104 | |
| 105 write.table(fname,paste(folder,"tmp_",st,"_",ed,".input",sep=""),quote=F,row.names=F,col.names=F); |
