diff runproc.R @ 22:3651f1592f3f draft

Uploaded
author greg
date Wed, 31 Jan 2018 14:13:11 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/runproc.R	Wed Jan 31 14:13:11 2018 -0500
@@ -0,0 +1,105 @@
+normdata<-function(case, control, method="nbinom")
+{	d=dnorm(-10:10,0,10);
+	u= c(rep(0,10),control,rep(0,10));
+	l=length(case);
+	nu=rep(0,l);
+	for(i in 1:21)
+	{	nu = nu + u[i-1+1:l] * d[i];
+	}
+	nu = nu / sum(d);
+
+	t = which(case<quantile(case,prob=0.99))
+	p=mean(case[t])/var(case[t]);
+	if(p>0.9) p=0.9;
+	if(p<0.1) p=0.1;
+	n=mean(case[t])*p/(1-p);
+	print(c(n,p));
+	#nc = log2((case*20+1)/(nu/mean(nu)*mean(case)*20+1)+1)-1;
+	if(method=="nbinom")
+	{	nc = -pnbinom(case,size=c((nu+1)/mean(nu+1)*n),prob=p,lower.tail=F,log.p=T)/log(10);
+	} else
+	{	nc = -ppois(case,(nu+1)/mean(nu+1)*mean(case),lower.tail=F,log.p=T)/log(10);
+	}
+	return(nc);
+}
+
+binsz=200
+folder="me66/Input/"
+mbed="/gpfs/group/yzz2/default/IDEAS_server/data/mm10.bed"
+
+#read file list
+args<-commandArgs(trailingOnly=TRUE);
+flist=args[1];
+x=as.matrix(read.table(flist));
+if(length(args) > 1)
+{	st=as.integer(args[2]);
+	ed=as.integer(args[3]);
+} else
+{	st=1;
+	ed=dim(x)[1];
+}
+l=paste(x[,1],x[,2]);
+ll=unique(l);
+id=rep(0,length(l));
+for(i in ll)
+{	t=which(l==i);
+	if(length(t)>1)
+	{	id[t]=1:length(t);
+	}
+}
+
+y=x[st:ed,1:3];
+if(dim(x)[2]==4)
+{ y=rbind(y,x[st:ed,c(1:2,4)]); }
+t=match(unique(y[,3]),y[,3]);
+k=rapply(as.list(y[t,3]),function(z){tail(unlist(gregexpr("\\/",z)),n=1);});
+y=cbind(y[t,],substr(y[t,3],k+1,1000));
+
+#fetch data and process data to windows mean
+for(i in 1:dim(y)[1])
+{	fout = paste(folder, y[i,1],"_",y[i,2],"_",y[i,4],sep="");	
+	system(paste("wget -O",fout, y[i,3]));
+	tl = tail(unlist(gregexpr("\\.",fout)),n=1);
+	name = substr(fout,1,tl);
+	ftype = substr(fout, tl+1, 100000);
+	if(tolower(ftype) == "bam")
+	{	system(paste("samtools index", fout));
+		bw=paste(name,".bw",sep="");
+		system(paste("bamCoverage --bam",fout,"-o",bw,"--binSize", binsz));
+	} else
+	{	bw=fout;
+	}
+	bd=paste(name,".bed",sep="");
+	system(paste("/storage/home/yzz2/work/tools/bigWigAverageOverBed", bw, mbed, "stdout | cut -f5 >", bd));
+	system(paste("gzip -f",bd));
+	system(paste("rm ", name,".ba*",sep=""));
+	system(paste("rm ", name,".bw",sep=""));
+}
+
+#normalize data if inputs are available, and prepare data file list
+fname=NULL;
+for(i in st:ed)
+{	k=match(x[i,3],y[,3]);
+	f1=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep="");
+	if(dim(x)[2]==4)
+	{	k=match(x[i,4],y[,3]);
+		f0=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep="");
+		if(id[i]>0) 
+		{	nf=paste(folder,x[i,1],".",x[i,2],"_",id[i],".norm.bed",sep="");
+		} else
+		{	nf=paste(folder,x[i,1],".",x[i,2],".norm.bed",sep="");
+		}	
+		x1=as.matrix(read.table(f1));
+		x0=as.matrix(read.table(f0));
+		nx=normdata(x1,x0);
+		write.table(round(nx*100)/100,nf,quote=F,row.names=F,col.names=F);
+		system(paste("gzip -f",nf));
+		fname=rbind(fname,c(x[i,1:2],paste(nf,".gz",sep="")));
+	} else
+	{	nf=paste(fold,x[i,1],".",x[i,2],".bed.gz",sep="");
+		system(paste("mv", f1, nf));
+		fname=rbind(fname,c(x[i,1:2],nf));
+	}
+}
+
+write.table(fname,paste(folder,"tmp_",st,"_",ed,".input",sep=""),quote=F,row.names=F,col.names=F);