comparison RScript.r @ 9:7dbc9ebcefce draft

Uploaded
author davidvanzessen
date Mon, 12 Jan 2015 05:35:42 -0500
parents 043fd6613fd9
children b8db36cfe6ad
comparison
equal deleted inserted replaced
8:043fd6613fd9 9:7dbc9ebcefce
48 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) 48 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene)
49 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) 49 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene)
50 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) 50 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene)
51 inputdata$clonaltype = 1:nrow(inputdata) 51 inputdata$clonaltype = 1:nrow(inputdata)
52 PRODF = inputdata 52 PRODF = inputdata
53 UNPROD = inputdata
53 if(filterproductive){ 54 if(filterproductive){
54 if("Functionality" %in% colnames(inputdata)) { # "Functionality" is an IMGT column 55 if("Functionality" %in% colnames(inputdata)) { # "Functionality" is an IMGT column
55 PRODF = inputdata[inputdata$Functionality == "productive" | inputdata$Functionality == "productive (see comment)", ] 56 PRODF = inputdata[inputdata$Functionality == "productive" | inputdata$Functionality == "productive (see comment)", ]
57 UNPROD = inputdata[!(inputdata$Functionality == "productive" | inputdata$Functionality == "productive (see comment)"), ]
56 } else { 58 } else {
57 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] 59 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ]
60 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ]
58 } 61 }
59 } 62 }
60 63
61 #remove duplicates based on the clonaltype 64 #remove duplicates based on the clonaltype
62 if(clonaltype != "none"){ 65 if(clonaltype != "none"){
63 PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":")) 66 PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":"))
64 PRODF = PRODF[!duplicated(PRODF$clonaltype), ] 67 PRODF = PRODF[!duplicated(PRODF$clonaltype), ]
68 UNPROD$clonaltype = do.call(paste, c(UNPROD[unlist(strsplit(clonaltype, ","))], sep = ":"))
69 UNPROD = UNPROD[!duplicated(UNPROD$clonaltype), ]
65 } 70 }
66 71
67 PRODF$freq = 1 72 PRODF$freq = 1
68 73
69 if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*" 74 if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*"
574 Total.P=( mean(P3V.nt.nb, na.rm=T) + 579 Total.P=( mean(P3V.nt.nb, na.rm=T) +
575 mean(P5D.nt.nb, na.rm=T) + 580 mean(P5D.nt.nb, na.rm=T) +
576 mean(P3D.nt.nb, na.rm=T) + 581 mean(P3D.nt.nb, na.rm=T) +
577 mean(P5J.nt.nb, na.rm=T))), 582 mean(P5J.nt.nb, na.rm=T))),
578 by=c("Sample")]) 583 by=c("Sample")])
579 write.table(newData, "junctionAnalysis.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 584 write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
580 } 585
586 newData = data.frame(data.table(UNPROD)[,list(unique=.N,
587 VH.DEL=mean(X3V.REGION.trimmed.nt.nb, na.rm=T),
588 P1=mean(P3V.nt.nb, na.rm=T),
589 N1=mean(N1.REGION.nt.nb, na.rm=T),
590 P2=mean(P5D.nt.nb, na.rm=T),
591 DEL.DH=mean(X5D.REGION.trimmed.nt.nb, na.rm=T),
592 DH.DEL=mean(X3D.REGION.trimmed.nt.nb, na.rm=T),
593 P3=mean(P3D.nt.nb, na.rm=T),
594 N2=mean(N2.REGION.nt.nb, na.rm=T),
595 P4=mean(P5J.nt.nb, na.rm=T),
596 DEL.JH=mean(X5J.REGION.trimmed.nt.nb, na.rm=T),
597 Total.Del=( mean(X3V.REGION.trimmed.nt.nb, na.rm=T) +
598 mean(X5D.REGION.trimmed.nt.nb, na.rm=T) +
599 mean(X3D.REGION.trimmed.nt.nb, na.rm=T) +
600 mean(X5J.REGION.trimmed.nt.nb, na.rm=T)),
601
602 Total.N=( mean(N1.REGION.nt.nb, na.rm=T) +
603 mean(N2.REGION.nt.nb, na.rm=T)),
604
605 Total.P=( mean(P3V.nt.nb, na.rm=T) +
606 mean(P5D.nt.nb, na.rm=T) +
607 mean(P3D.nt.nb, na.rm=T) +
608 mean(P5J.nt.nb, na.rm=T))),
609 by=c("Sample")])
610 write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
611 }