comparison RScript.r @ 4:8ba0fd5b03a1 draft

Uploaded
author davidvanzessen
date Tue, 10 Dec 2013 05:52:37 -0500
parents e1aa99d86a8a
children 3ce07f5889ad
comparison
equal deleted inserted replaced
3:e1aa99d86a8a 4:8ba0fd5b03a1
36 36
37 #test$VDJCDR3 = do.call(paste, c(test[c("Top.V.Gene", "Top.D.Gene", "Top.J.Gene","CDR3.Seq.DNA")], sep = ":")) 37 #test$VDJCDR3 = do.call(paste, c(test[c("Top.V.Gene", "Top.D.Gene", "Top.J.Gene","CDR3.Seq.DNA")], sep = ":"))
38 test$VDJCDR3 = do.call(paste, c(test[unlist(strsplit(clonalType, ","))], sep = ":")) 38 test$VDJCDR3 = do.call(paste, c(test[unlist(strsplit(clonalType, ","))], sep = ":"))
39 39
40 PROD = test[test$VDJ.Frame != "In-frame with stop codon" & test$VDJ.Frame != "Out-of-frame" & test$CDR3.Found.How != "NOT_FOUND" , ] 40 PROD = test[test$VDJ.Frame != "In-frame with stop codon" & test$VDJ.Frame != "Out-of-frame" & test$CDR3.Found.How != "NOT_FOUND" , ]
41 if("Functionality" %in% colnames(test)) {
42 PROD = test[test$Functionality == "productive" | test$Functionality == "productive (see comment)", ]
43 }
41 44
42 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] 45 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ]
43 46
44 #PRODF = PROD[ -1] 47 #PRODF = PROD[ -1]
45 48
87 Jchain = read.table(tcJ, sep="\t", header=TRUE) 90 Jchain = read.table(tcJ, sep="\t", header=TRUE)
88 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) 91 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE)
89 close(tcJ) 92 close(tcJ)
90 93
91 setwd(outDir) 94 setwd(outDir)
95
96 write.table(PRODF, "allUnique.tsv", sep="\t",quote=F,row.names=T,col.names=T)
92 97
93 pV = ggplot(PRODFV) 98 pV = ggplot(PRODFV)
94 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 99 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
95 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") 100 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage")
96 101
229 if("Replicate" %in% colnames(test)) 234 if("Replicate" %in% colnames(test))
230 { 235 {
231 clonalityFrame = PROD 236 clonalityFrame = PROD
232 clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":")) 237 clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":"))
233 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$ReplicateConcat), ] 238 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$ReplicateConcat), ]
234
235 write.table(clonalityFrame, "clonalityComplete.tsv", sep="\t",quote=F,row.names=T,col.names=T) 239 write.table(clonalityFrame, "clonalityComplete.tsv", sep="\t",quote=F,row.names=T,col.names=T)
236 240
237 ClonalitySampleReplicatePrint <- function(dat){ 241 ClonalitySampleReplicatePrint <- function(dat){
238 write.table(dat, paste("clonality_", unique(dat$Sample) , "_", unique(dat$Replicate), ".tsv", sep=""), sep="\t",quote=F,row.names=T,col.names=T) 242 write.table(dat, paste("clonality_", unique(dat$Sample) , "_", unique(dat$Replicate), ".tsv", sep=""), sep="\t",quote=F,row.names=T,col.names=T)
239 } 243 }
245 write.table(dat, paste("clonality_", unique(dat$Sample) , ".tsv", sep=""), sep="\t",quote=F,row.names=T,col.names=T) 249 write.table(dat, paste("clonality_", unique(dat$Sample) , ".tsv", sep=""), sep="\t",quote=F,row.names=T,col.names=T)
246 } 250 }
247 251
248 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"]) 252 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"])
249 lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint) 253 lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint)
250 254
251 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "VDJCDR3")]) 255 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "VDJCDR3")])
252 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) 256 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
253 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count 257 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
254 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) 258 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
255 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") 259 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")