annotate runproc.R @ 22:3651f1592f3f draft

Uploaded
author greg
date Wed, 31 Jan 2018 14:13:11 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
22
3651f1592f3f Uploaded
greg
parents:
diff changeset
1 normdata<-function(case, control, method="nbinom")
3651f1592f3f Uploaded
greg
parents:
diff changeset
2 { d=dnorm(-10:10,0,10);
3651f1592f3f Uploaded
greg
parents:
diff changeset
3 u= c(rep(0,10),control,rep(0,10));
3651f1592f3f Uploaded
greg
parents:
diff changeset
4 l=length(case);
3651f1592f3f Uploaded
greg
parents:
diff changeset
5 nu=rep(0,l);
3651f1592f3f Uploaded
greg
parents:
diff changeset
6 for(i in 1:21)
3651f1592f3f Uploaded
greg
parents:
diff changeset
7 { nu = nu + u[i-1+1:l] * d[i];
3651f1592f3f Uploaded
greg
parents:
diff changeset
8 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
9 nu = nu / sum(d);
3651f1592f3f Uploaded
greg
parents:
diff changeset
10
3651f1592f3f Uploaded
greg
parents:
diff changeset
11 t = which(case<quantile(case,prob=0.99))
3651f1592f3f Uploaded
greg
parents:
diff changeset
12 p=mean(case[t])/var(case[t]);
3651f1592f3f Uploaded
greg
parents:
diff changeset
13 if(p>0.9) p=0.9;
3651f1592f3f Uploaded
greg
parents:
diff changeset
14 if(p<0.1) p=0.1;
3651f1592f3f Uploaded
greg
parents:
diff changeset
15 n=mean(case[t])*p/(1-p);
3651f1592f3f Uploaded
greg
parents:
diff changeset
16 print(c(n,p));
3651f1592f3f Uploaded
greg
parents:
diff changeset
17 #nc = log2((case*20+1)/(nu/mean(nu)*mean(case)*20+1)+1)-1;
3651f1592f3f Uploaded
greg
parents:
diff changeset
18 if(method=="nbinom")
3651f1592f3f Uploaded
greg
parents:
diff changeset
19 { nc = -pnbinom(case,size=c((nu+1)/mean(nu+1)*n),prob=p,lower.tail=F,log.p=T)/log(10);
3651f1592f3f Uploaded
greg
parents:
diff changeset
20 } else
3651f1592f3f Uploaded
greg
parents:
diff changeset
21 { nc = -ppois(case,(nu+1)/mean(nu+1)*mean(case),lower.tail=F,log.p=T)/log(10);
3651f1592f3f Uploaded
greg
parents:
diff changeset
22 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
23 return(nc);
3651f1592f3f Uploaded
greg
parents:
diff changeset
24 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
25
3651f1592f3f Uploaded
greg
parents:
diff changeset
26 binsz=200
3651f1592f3f Uploaded
greg
parents:
diff changeset
27 folder="me66/Input/"
3651f1592f3f Uploaded
greg
parents:
diff changeset
28 mbed="/gpfs/group/yzz2/default/IDEAS_server/data/mm10.bed"
3651f1592f3f Uploaded
greg
parents:
diff changeset
29
3651f1592f3f Uploaded
greg
parents:
diff changeset
30 #read file list
3651f1592f3f Uploaded
greg
parents:
diff changeset
31 args<-commandArgs(trailingOnly=TRUE);
3651f1592f3f Uploaded
greg
parents:
diff changeset
32 flist=args[1];
3651f1592f3f Uploaded
greg
parents:
diff changeset
33 x=as.matrix(read.table(flist));
3651f1592f3f Uploaded
greg
parents:
diff changeset
34 if(length(args) > 1)
3651f1592f3f Uploaded
greg
parents:
diff changeset
35 { st=as.integer(args[2]);
3651f1592f3f Uploaded
greg
parents:
diff changeset
36 ed=as.integer(args[3]);
3651f1592f3f Uploaded
greg
parents:
diff changeset
37 } else
3651f1592f3f Uploaded
greg
parents:
diff changeset
38 { st=1;
3651f1592f3f Uploaded
greg
parents:
diff changeset
39 ed=dim(x)[1];
3651f1592f3f Uploaded
greg
parents:
diff changeset
40 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
41 l=paste(x[,1],x[,2]);
3651f1592f3f Uploaded
greg
parents:
diff changeset
42 ll=unique(l);
3651f1592f3f Uploaded
greg
parents:
diff changeset
43 id=rep(0,length(l));
3651f1592f3f Uploaded
greg
parents:
diff changeset
44 for(i in ll)
3651f1592f3f Uploaded
greg
parents:
diff changeset
45 { t=which(l==i);
3651f1592f3f Uploaded
greg
parents:
diff changeset
46 if(length(t)>1)
3651f1592f3f Uploaded
greg
parents:
diff changeset
47 { id[t]=1:length(t);
3651f1592f3f Uploaded
greg
parents:
diff changeset
48 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
49 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
50
3651f1592f3f Uploaded
greg
parents:
diff changeset
51 y=x[st:ed,1:3];
3651f1592f3f Uploaded
greg
parents:
diff changeset
52 if(dim(x)[2]==4)
3651f1592f3f Uploaded
greg
parents:
diff changeset
53 { y=rbind(y,x[st:ed,c(1:2,4)]); }
3651f1592f3f Uploaded
greg
parents:
diff changeset
54 t=match(unique(y[,3]),y[,3]);
3651f1592f3f Uploaded
greg
parents:
diff changeset
55 k=rapply(as.list(y[t,3]),function(z){tail(unlist(gregexpr("\\/",z)),n=1);});
3651f1592f3f Uploaded
greg
parents:
diff changeset
56 y=cbind(y[t,],substr(y[t,3],k+1,1000));
3651f1592f3f Uploaded
greg
parents:
diff changeset
57
3651f1592f3f Uploaded
greg
parents:
diff changeset
58 #fetch data and process data to windows mean
3651f1592f3f Uploaded
greg
parents:
diff changeset
59 for(i in 1:dim(y)[1])
3651f1592f3f Uploaded
greg
parents:
diff changeset
60 { fout = paste(folder, y[i,1],"_",y[i,2],"_",y[i,4],sep="");
3651f1592f3f Uploaded
greg
parents:
diff changeset
61 system(paste("wget -O",fout, y[i,3]));
3651f1592f3f Uploaded
greg
parents:
diff changeset
62 tl = tail(unlist(gregexpr("\\.",fout)),n=1);
3651f1592f3f Uploaded
greg
parents:
diff changeset
63 name = substr(fout,1,tl);
3651f1592f3f Uploaded
greg
parents:
diff changeset
64 ftype = substr(fout, tl+1, 100000);
3651f1592f3f Uploaded
greg
parents:
diff changeset
65 if(tolower(ftype) == "bam")
3651f1592f3f Uploaded
greg
parents:
diff changeset
66 { system(paste("samtools index", fout));
3651f1592f3f Uploaded
greg
parents:
diff changeset
67 bw=paste(name,".bw",sep="");
3651f1592f3f Uploaded
greg
parents:
diff changeset
68 system(paste("bamCoverage --bam",fout,"-o",bw,"--binSize", binsz));
3651f1592f3f Uploaded
greg
parents:
diff changeset
69 } else
3651f1592f3f Uploaded
greg
parents:
diff changeset
70 { bw=fout;
3651f1592f3f Uploaded
greg
parents:
diff changeset
71 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
72 bd=paste(name,".bed",sep="");
3651f1592f3f Uploaded
greg
parents:
diff changeset
73 system(paste("/storage/home/yzz2/work/tools/bigWigAverageOverBed", bw, mbed, "stdout | cut -f5 >", bd));
3651f1592f3f Uploaded
greg
parents:
diff changeset
74 system(paste("gzip -f",bd));
3651f1592f3f Uploaded
greg
parents:
diff changeset
75 system(paste("rm ", name,".ba*",sep=""));
3651f1592f3f Uploaded
greg
parents:
diff changeset
76 system(paste("rm ", name,".bw",sep=""));
3651f1592f3f Uploaded
greg
parents:
diff changeset
77 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
78
3651f1592f3f Uploaded
greg
parents:
diff changeset
79 #normalize data if inputs are available, and prepare data file list
3651f1592f3f Uploaded
greg
parents:
diff changeset
80 fname=NULL;
3651f1592f3f Uploaded
greg
parents:
diff changeset
81 for(i in st:ed)
3651f1592f3f Uploaded
greg
parents:
diff changeset
82 { k=match(x[i,3],y[,3]);
3651f1592f3f Uploaded
greg
parents:
diff changeset
83 f1=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep="");
3651f1592f3f Uploaded
greg
parents:
diff changeset
84 if(dim(x)[2]==4)
3651f1592f3f Uploaded
greg
parents:
diff changeset
85 { k=match(x[i,4],y[,3]);
3651f1592f3f Uploaded
greg
parents:
diff changeset
86 f0=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep="");
3651f1592f3f Uploaded
greg
parents:
diff changeset
87 if(id[i]>0)
3651f1592f3f Uploaded
greg
parents:
diff changeset
88 { nf=paste(folder,x[i,1],".",x[i,2],"_",id[i],".norm.bed",sep="");
3651f1592f3f Uploaded
greg
parents:
diff changeset
89 } else
3651f1592f3f Uploaded
greg
parents:
diff changeset
90 { nf=paste(folder,x[i,1],".",x[i,2],".norm.bed",sep="");
3651f1592f3f Uploaded
greg
parents:
diff changeset
91 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
92 x1=as.matrix(read.table(f1));
3651f1592f3f Uploaded
greg
parents:
diff changeset
93 x0=as.matrix(read.table(f0));
3651f1592f3f Uploaded
greg
parents:
diff changeset
94 nx=normdata(x1,x0);
3651f1592f3f Uploaded
greg
parents:
diff changeset
95 write.table(round(nx*100)/100,nf,quote=F,row.names=F,col.names=F);
3651f1592f3f Uploaded
greg
parents:
diff changeset
96 system(paste("gzip -f",nf));
3651f1592f3f Uploaded
greg
parents:
diff changeset
97 fname=rbind(fname,c(x[i,1:2],paste(nf,".gz",sep="")));
3651f1592f3f Uploaded
greg
parents:
diff changeset
98 } else
3651f1592f3f Uploaded
greg
parents:
diff changeset
99 { nf=paste(fold,x[i,1],".",x[i,2],".bed.gz",sep="");
3651f1592f3f Uploaded
greg
parents:
diff changeset
100 system(paste("mv", f1, nf));
3651f1592f3f Uploaded
greg
parents:
diff changeset
101 fname=rbind(fname,c(x[i,1:2],nf));
3651f1592f3f Uploaded
greg
parents:
diff changeset
102 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
103 }
3651f1592f3f Uploaded
greg
parents:
diff changeset
104
3651f1592f3f Uploaded
greg
parents:
diff changeset
105 write.table(fname,paste(folder,"tmp_",st,"_",ed,".input",sep=""),quote=F,row.names=F,col.names=F);