comparison RScript.r @ 6:f4ff7450ef16 draft

Uploaded
author davidvanzessen
date Fri, 22 Aug 2014 11:02:20 -0400
parents fb219dfdcc15
children
comparison
equal deleted inserted replaced
5:fb219dfdcc15 6:f4ff7450ef16
375 375
376 ClonalitySampleReplicatePrint <- function(dat){ 376 ClonalitySampleReplicatePrint <- function(dat){
377 write.table(dat, paste("clonality_", unique(dat$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) 377 write.table(dat, paste("clonality_", unique(dat$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
378 } 378 }
379 379
380 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")]) 380 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")])
381 #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint) 381 #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint)
382 382
383 ClonalitySamplePrint <- function(dat){ 383 ClonalitySamplePrint <- function(dat){
384 write.table(dat, paste("clonality_", unique(dat$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) 384 write.table(dat, paste("clonality_", unique(dat$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
385 } 385 }
386 386
387 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"]) 387 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"])
388 #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint) 388 #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint)
389 389
390 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "VDJCDR3")]) 390 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "VDJCDR3")])
391 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) 391 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
392 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count 392 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
393 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) 393 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
420 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) 420 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
421 } 421 }
422 422
423 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) 423 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
424 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) 424 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)
425
426 writeClonalitySequences <- function(dat){
427 for(i in c(2,3,4,5,6)){
428 fltr = dat[dat$Type == i,]
429 if(length(fltr[,1]) == 0){
430 next
431 }
432 tmp = clonalityFrame[clonalityFrame$Sample == fltr$Sample[1] & clonalityFrame$VDJCDR3 %in% fltr$VDJCDR3,]
433 tmp = tmp[order(tmp$VDJCDR3),]
434 write.table(tmp, paste("ClonalitySequences_", unique(dat[1])[1,1] , "_", i, ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=T)
435 }
436 }
437 freqsplt = split(clonalFreq[clonalFreq$Type > 1,], clonalFreq[clonalFreq$Type > 1,]$Sample)
438 lapply(freqsplt, FUN=writeClonalitySequences)
425 439
426 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) 440 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])
427 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) 441 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)
428 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow 442 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow
429 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) 443 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)