Mercurial > repos > davidvanzessen > complete_immunerepertoire_igg
comparison RScript.r @ 9:7dbc9ebcefce draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 12 Jan 2015 05:35:42 -0500 |
parents | 043fd6613fd9 |
children | b8db36cfe6ad |
comparison
equal
deleted
inserted
replaced
8:043fd6613fd9 | 9:7dbc9ebcefce |
---|---|
48 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) | 48 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) |
49 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) | 49 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) |
50 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) | 50 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) |
51 inputdata$clonaltype = 1:nrow(inputdata) | 51 inputdata$clonaltype = 1:nrow(inputdata) |
52 PRODF = inputdata | 52 PRODF = inputdata |
53 UNPROD = inputdata | |
53 if(filterproductive){ | 54 if(filterproductive){ |
54 if("Functionality" %in% colnames(inputdata)) { # "Functionality" is an IMGT column | 55 if("Functionality" %in% colnames(inputdata)) { # "Functionality" is an IMGT column |
55 PRODF = inputdata[inputdata$Functionality == "productive" | inputdata$Functionality == "productive (see comment)", ] | 56 PRODF = inputdata[inputdata$Functionality == "productive" | inputdata$Functionality == "productive (see comment)", ] |
57 UNPROD = inputdata[!(inputdata$Functionality == "productive" | inputdata$Functionality == "productive (see comment)"), ] | |
56 } else { | 58 } else { |
57 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] | 59 PRODF = inputdata[inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" , ] |
60 UNPROD = inputdata[!(inputdata$VDJ.Frame != "In-frame with stop codon" & inputdata$VDJ.Frame != "Out-of-frame" & inputdata$CDR3.Found.How != "NOT_FOUND" ), ] | |
58 } | 61 } |
59 } | 62 } |
60 | 63 |
61 #remove duplicates based on the clonaltype | 64 #remove duplicates based on the clonaltype |
62 if(clonaltype != "none"){ | 65 if(clonaltype != "none"){ |
63 PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":")) | 66 PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":")) |
64 PRODF = PRODF[!duplicated(PRODF$clonaltype), ] | 67 PRODF = PRODF[!duplicated(PRODF$clonaltype), ] |
68 UNPROD$clonaltype = do.call(paste, c(UNPROD[unlist(strsplit(clonaltype, ","))], sep = ":")) | |
69 UNPROD = UNPROD[!duplicated(UNPROD$clonaltype), ] | |
65 } | 70 } |
66 | 71 |
67 PRODF$freq = 1 | 72 PRODF$freq = 1 |
68 | 73 |
69 if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*" | 74 if(any(grepl(pattern="_", x=PRODF$ID))){ #the frequency can be stored in the ID with the pattern ".*_freq_.*" |
574 Total.P=( mean(P3V.nt.nb, na.rm=T) + | 579 Total.P=( mean(P3V.nt.nb, na.rm=T) + |
575 mean(P5D.nt.nb, na.rm=T) + | 580 mean(P5D.nt.nb, na.rm=T) + |
576 mean(P3D.nt.nb, na.rm=T) + | 581 mean(P3D.nt.nb, na.rm=T) + |
577 mean(P5J.nt.nb, na.rm=T))), | 582 mean(P5J.nt.nb, na.rm=T))), |
578 by=c("Sample")]) | 583 by=c("Sample")]) |
579 write.table(newData, "junctionAnalysis.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 584 write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
580 } | 585 |
586 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | |
587 VH.DEL=mean(X3V.REGION.trimmed.nt.nb, na.rm=T), | |
588 P1=mean(P3V.nt.nb, na.rm=T), | |
589 N1=mean(N1.REGION.nt.nb, na.rm=T), | |
590 P2=mean(P5D.nt.nb, na.rm=T), | |
591 DEL.DH=mean(X5D.REGION.trimmed.nt.nb, na.rm=T), | |
592 DH.DEL=mean(X3D.REGION.trimmed.nt.nb, na.rm=T), | |
593 P3=mean(P3D.nt.nb, na.rm=T), | |
594 N2=mean(N2.REGION.nt.nb, na.rm=T), | |
595 P4=mean(P5J.nt.nb, na.rm=T), | |
596 DEL.JH=mean(X5J.REGION.trimmed.nt.nb, na.rm=T), | |
597 Total.Del=( mean(X3V.REGION.trimmed.nt.nb, na.rm=T) + | |
598 mean(X5D.REGION.trimmed.nt.nb, na.rm=T) + | |
599 mean(X3D.REGION.trimmed.nt.nb, na.rm=T) + | |
600 mean(X5J.REGION.trimmed.nt.nb, na.rm=T)), | |
601 | |
602 Total.N=( mean(N1.REGION.nt.nb, na.rm=T) + | |
603 mean(N2.REGION.nt.nb, na.rm=T)), | |
604 | |
605 Total.P=( mean(P3V.nt.nb, na.rm=T) + | |
606 mean(P5D.nt.nb, na.rm=T) + | |
607 mean(P3D.nt.nb, na.rm=T) + | |
608 mean(P5J.nt.nb, na.rm=T))), | |
609 by=c("Sample")]) | |
610 write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | |
611 } |