comparison runproc.R @ 22:3651f1592f3f draft

Uploaded
author greg
date Wed, 31 Jan 2018 14:13:11 -0500
parents
children
comparison
equal deleted inserted replaced
21:99102499271a 22:3651f1592f3f
1 normdata<-function(case, control, method="nbinom")
2 { d=dnorm(-10:10,0,10);
3 u= c(rep(0,10),control,rep(0,10));
4 l=length(case);
5 nu=rep(0,l);
6 for(i in 1:21)
7 { nu = nu + u[i-1+1:l] * d[i];
8 }
9 nu = nu / sum(d);
10
11 t = which(case<quantile(case,prob=0.99))
12 p=mean(case[t])/var(case[t]);
13 if(p>0.9) p=0.9;
14 if(p<0.1) p=0.1;
15 n=mean(case[t])*p/(1-p);
16 print(c(n,p));
17 #nc = log2((case*20+1)/(nu/mean(nu)*mean(case)*20+1)+1)-1;
18 if(method=="nbinom")
19 { nc = -pnbinom(case,size=c((nu+1)/mean(nu+1)*n),prob=p,lower.tail=F,log.p=T)/log(10);
20 } else
21 { nc = -ppois(case,(nu+1)/mean(nu+1)*mean(case),lower.tail=F,log.p=T)/log(10);
22 }
23 return(nc);
24 }
25
26 binsz=200
27 folder="me66/Input/"
28 mbed="/gpfs/group/yzz2/default/IDEAS_server/data/mm10.bed"
29
30 #read file list
31 args<-commandArgs(trailingOnly=TRUE);
32 flist=args[1];
33 x=as.matrix(read.table(flist));
34 if(length(args) > 1)
35 { st=as.integer(args[2]);
36 ed=as.integer(args[3]);
37 } else
38 { st=1;
39 ed=dim(x)[1];
40 }
41 l=paste(x[,1],x[,2]);
42 ll=unique(l);
43 id=rep(0,length(l));
44 for(i in ll)
45 { t=which(l==i);
46 if(length(t)>1)
47 { id[t]=1:length(t);
48 }
49 }
50
51 y=x[st:ed,1:3];
52 if(dim(x)[2]==4)
53 { y=rbind(y,x[st:ed,c(1:2,4)]); }
54 t=match(unique(y[,3]),y[,3]);
55 k=rapply(as.list(y[t,3]),function(z){tail(unlist(gregexpr("\\/",z)),n=1);});
56 y=cbind(y[t,],substr(y[t,3],k+1,1000));
57
58 #fetch data and process data to windows mean
59 for(i in 1:dim(y)[1])
60 { fout = paste(folder, y[i,1],"_",y[i,2],"_",y[i,4],sep="");
61 system(paste("wget -O",fout, y[i,3]));
62 tl = tail(unlist(gregexpr("\\.",fout)),n=1);
63 name = substr(fout,1,tl);
64 ftype = substr(fout, tl+1, 100000);
65 if(tolower(ftype) == "bam")
66 { system(paste("samtools index", fout));
67 bw=paste(name,".bw",sep="");
68 system(paste("bamCoverage --bam",fout,"-o",bw,"--binSize", binsz));
69 } else
70 { bw=fout;
71 }
72 bd=paste(name,".bed",sep="");
73 system(paste("/storage/home/yzz2/work/tools/bigWigAverageOverBed", bw, mbed, "stdout | cut -f5 >", bd));
74 system(paste("gzip -f",bd));
75 system(paste("rm ", name,".ba*",sep=""));
76 system(paste("rm ", name,".bw",sep=""));
77 }
78
79 #normalize data if inputs are available, and prepare data file list
80 fname=NULL;
81 for(i in st:ed)
82 { k=match(x[i,3],y[,3]);
83 f1=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep="");
84 if(dim(x)[2]==4)
85 { k=match(x[i,4],y[,3]);
86 f0=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep="");
87 if(id[i]>0)
88 { nf=paste(folder,x[i,1],".",x[i,2],"_",id[i],".norm.bed",sep="");
89 } else
90 { nf=paste(folder,x[i,1],".",x[i,2],".norm.bed",sep="");
91 }
92 x1=as.matrix(read.table(f1));
93 x0=as.matrix(read.table(f0));
94 nx=normdata(x1,x0);
95 write.table(round(nx*100)/100,nf,quote=F,row.names=F,col.names=F);
96 system(paste("gzip -f",nf));
97 fname=rbind(fname,c(x[i,1:2],paste(nf,".gz",sep="")));
98 } else
99 { nf=paste(fold,x[i,1],".",x[i,2],".bed.gz",sep="");
100 system(paste("mv", f1, nf));
101 fname=rbind(fname,c(x[i,1:2],nf));
102 }
103 }
104
105 write.table(fname,paste(folder,"tmp_",st,"_",ed,".input",sep=""),quote=F,row.names=F,col.names=F);