Mercurial > repos > greg > ideas_preprocessor
changeset 23:6306f535a658 draft
Deleted selected files
author | greg |
---|---|
date | Thu, 01 Feb 2018 11:22:09 -0500 |
parents | 3651f1592f3f |
children | 71345e154c66 |
files | runproc.R |
diffstat | 1 files changed, 0 insertions(+), 105 deletions(-) [+] |
line wrap: on
line diff
--- a/runproc.R Wed Jan 31 14:13:11 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -normdata<-function(case, control, method="nbinom") -{ d=dnorm(-10:10,0,10); - u= c(rep(0,10),control,rep(0,10)); - l=length(case); - nu=rep(0,l); - for(i in 1:21) - { nu = nu + u[i-1+1:l] * d[i]; - } - nu = nu / sum(d); - - t = which(case<quantile(case,prob=0.99)) - p=mean(case[t])/var(case[t]); - if(p>0.9) p=0.9; - if(p<0.1) p=0.1; - n=mean(case[t])*p/(1-p); - print(c(n,p)); - #nc = log2((case*20+1)/(nu/mean(nu)*mean(case)*20+1)+1)-1; - if(method=="nbinom") - { nc = -pnbinom(case,size=c((nu+1)/mean(nu+1)*n),prob=p,lower.tail=F,log.p=T)/log(10); - } else - { nc = -ppois(case,(nu+1)/mean(nu+1)*mean(case),lower.tail=F,log.p=T)/log(10); - } - return(nc); -} - -binsz=200 -folder="me66/Input/" -mbed="/gpfs/group/yzz2/default/IDEAS_server/data/mm10.bed" - -#read file list -args<-commandArgs(trailingOnly=TRUE); -flist=args[1]; -x=as.matrix(read.table(flist)); -if(length(args) > 1) -{ st=as.integer(args[2]); - ed=as.integer(args[3]); -} else -{ st=1; - ed=dim(x)[1]; -} -l=paste(x[,1],x[,2]); -ll=unique(l); -id=rep(0,length(l)); -for(i in ll) -{ t=which(l==i); - if(length(t)>1) - { id[t]=1:length(t); - } -} - -y=x[st:ed,1:3]; -if(dim(x)[2]==4) -{ y=rbind(y,x[st:ed,c(1:2,4)]); } -t=match(unique(y[,3]),y[,3]); -k=rapply(as.list(y[t,3]),function(z){tail(unlist(gregexpr("\\/",z)),n=1);}); -y=cbind(y[t,],substr(y[t,3],k+1,1000)); - -#fetch data and process data to windows mean -for(i in 1:dim(y)[1]) -{ fout = paste(folder, y[i,1],"_",y[i,2],"_",y[i,4],sep=""); - system(paste("wget -O",fout, y[i,3])); - tl = tail(unlist(gregexpr("\\.",fout)),n=1); - name = substr(fout,1,tl); - ftype = substr(fout, tl+1, 100000); - if(tolower(ftype) == "bam") - { system(paste("samtools index", fout)); - bw=paste(name,".bw",sep=""); - system(paste("bamCoverage --bam",fout,"-o",bw,"--binSize", binsz)); - } else - { bw=fout; - } - bd=paste(name,".bed",sep=""); - system(paste("/storage/home/yzz2/work/tools/bigWigAverageOverBed", bw, mbed, "stdout | cut -f5 >", bd)); - system(paste("gzip -f",bd)); - system(paste("rm ", name,".ba*",sep="")); - system(paste("rm ", name,".bw",sep="")); -} - -#normalize data if inputs are available, and prepare data file list -fname=NULL; -for(i in st:ed) -{ k=match(x[i,3],y[,3]); - f1=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep=""); - if(dim(x)[2]==4) - { k=match(x[i,4],y[,3]); - f0=paste(folder,y[k,1],"_",y[k,2],"_",substr(y[k,4],1,nchar(y[k,4])-4),".bed.gz",sep=""); - if(id[i]>0) - { nf=paste(folder,x[i,1],".",x[i,2],"_",id[i],".norm.bed",sep=""); - } else - { nf=paste(folder,x[i,1],".",x[i,2],".norm.bed",sep=""); - } - x1=as.matrix(read.table(f1)); - x0=as.matrix(read.table(f0)); - nx=normdata(x1,x0); - write.table(round(nx*100)/100,nf,quote=F,row.names=F,col.names=F); - system(paste("gzip -f",nf)); - fname=rbind(fname,c(x[i,1:2],paste(nf,".gz",sep=""))); - } else - { nf=paste(fold,x[i,1],".",x[i,2],".bed.gz",sep=""); - system(paste("mv", f1, nf)); - fname=rbind(fname,c(x[i,1:2],nf)); - } -} - -write.table(fname,paste(folder,"tmp_",st,"_",ed,".input",sep=""),quote=F,row.names=F,col.names=F);