comparison RScript.r @ 5:fb219dfdcc15 draft

Uploaded
author davidvanzessen
date Thu, 21 Aug 2014 10:44:40 -0400
parents 0748ef4d42d3
children f4ff7450ef16
comparison
equal deleted inserted replaced
4:0748ef4d42d3 5:fb219dfdcc15
15 } 15 }
16 library(gridExtra) 16 library(gridExtra)
17 if (!("ggplot2" %in% rownames(installed.packages()))) { 17 if (!("ggplot2" %in% rownames(installed.packages()))) {
18 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") 18 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/")
19 } 19 }
20 require(ggplot2) 20 library(ggplot2)
21 if (!("plyr" %in% rownames(installed.packages()))) { 21 if (!("plyr" %in% rownames(installed.packages()))) {
22 install.packages("plyr", repos="http://cran.xl-mirror.nl/") 22 install.packages("plyr", repos="http://cran.xl-mirror.nl/")
23 } 23 }
24 require(plyr) 24 library(plyr)
25 25
26 if (!("data.table" %in% rownames(installed.packages()))) { 26 if (!("data.table" %in% rownames(installed.packages()))) {
27 install.packages("data.table", repos="http://cran.xl-mirror.nl/") 27 install.packages("data.table", repos="http://cran.xl-mirror.nl/")
28 } 28 }
29 library(data.table) 29 library(data.table)
36 36
37 test = read.table(inFile, sep="\t", header=TRUE, fill=T, comment.char="") 37 test = read.table(inFile, sep="\t", header=TRUE, fill=T, comment.char="")
38 38
39 test = test[test$Sample != "",] 39 test = test[test$Sample != "",]
40 40
41 41 print("test1\n")
42 42
43 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) 43 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene)
44 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) 44 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene)
45 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) 45 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene)
46 46
55 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] 55 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ]
56 56
57 #PRODF = PROD[ -1] 57 #PRODF = PROD[ -1]
58 58
59 PRODF = PROD 59 PRODF = PROD
60 60 print("test2\n")
61 #PRODF = unique(PRODF) 61 #PRODF = unique(PRODF)
62 if(any(grepl(pattern="_", x=PRODF$ID))){ 62 if(any(grepl(pattern="_", x=PRODF$ID))){ #dumb and way to simple
63
64 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) 63 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID)
65 PRODF$freq = gsub("_.*", "", PRODF$freq) 64 PRODF$freq = gsub("_.*", "", PRODF$freq)
66 PRODF$freq = as.numeric(PRODF$freq) 65 PRODF$freq = as.numeric(PRODF$freq)
66 if(any(is.na(PRODF$freq))){ #fix the dumbness if it fails
67 PRODF$freq = 1
68 if(selection == "unique"){
69 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ]
70 }
71 }
67 } else { 72 } else {
68 PRODF$freq = 1 73 PRODF$freq = 1
69 if(selection == "unique"){ 74 if(selection == "unique"){
70 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] 75 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ]
71 } 76 }
93 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) 98 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total))
94 99
95 V = c("v.name\tchr.orderV\n") 100 V = c("v.name\tchr.orderV\n")
96 D = c("v.name\tchr.orderD\n") 101 D = c("v.name\tchr.orderD\n")
97 J = c("v.name\tchr.orderJ\n") 102 J = c("v.name\tchr.orderJ\n")
103
104 print("test3\n")
98 105
99 if(species == "human"){ 106 if(species == "human"){
100 if(locus == "trb"){ 107 if(locus == "trb"){
101 V = c("v.name\tchr.orderV\nTRBV2\t1\nTRBV3-1\t2\nTRBV4-1\t3\nTRBV5-1\t4\nTRBV6-1\t5\nTRBV4-2\t6\nTRBV6-2\t7\nTRBV4-3\t8\nTRBV6-3\t9\nTRBV7-2\t10\nTRBV6-4\t11\nTRBV7-3\t12\nTRBV9\t13\nTRBV10-1\t14\nTRBV11-1\t15\nTRBV10-2\t16\nTRBV11-2\t17\nTRBV6-5\t18\nTRBV7-4\t19\nTRBV5-4\t20\nTRBV6-6\t21\nTRBV5-5\t22\nTRBV7-6\t23\nTRBV5-6\t24\nTRBV6-8\t25\nTRBV7-7\t26\nTRBV6-9\t27\nTRBV7-8\t28\nTRBV5-8\t29\nTRBV7-9\t30\nTRBV13\t31\nTRBV10-3\t32\nTRBV11-3\t33\nTRBV12-3\t34\nTRBV12-4\t35\nTRBV12-5\t36\nTRBV14\t37\nTRBV15\t38\nTRBV16\t39\nTRBV18\t40\nTRBV19\t41\nTRBV20-1\t42\nTRBV24-1\t43\nTRBV25-1\t44\nTRBV27\t45\nTRBV28\t46\nTRBV29-1\t47\nTRBV30\t48") 108 V = c("v.name\tchr.orderV\nTRBV2\t1\nTRBV3-1\t2\nTRBV4-1\t3\nTRBV5-1\t4\nTRBV6-1\t5\nTRBV4-2\t6\nTRBV6-2\t7\nTRBV4-3\t8\nTRBV6-3\t9\nTRBV7-2\t10\nTRBV6-4\t11\nTRBV7-3\t12\nTRBV9\t13\nTRBV10-1\t14\nTRBV11-1\t15\nTRBV10-2\t16\nTRBV11-2\t17\nTRBV6-5\t18\nTRBV7-4\t19\nTRBV5-4\t20\nTRBV6-6\t21\nTRBV5-5\t22\nTRBV7-6\t23\nTRBV5-6\t24\nTRBV6-8\t25\nTRBV7-7\t26\nTRBV6-9\t27\nTRBV7-8\t28\nTRBV5-8\t29\nTRBV7-9\t30\nTRBV13\t31\nTRBV10-3\t32\nTRBV11-3\t33\nTRBV12-3\t34\nTRBV12-4\t35\nTRBV12-5\t36\nTRBV14\t37\nTRBV15\t38\nTRBV16\t39\nTRBV18\t40\nTRBV19\t41\nTRBV20-1\t42\nTRBV24-1\t43\nTRBV25-1\t44\nTRBV27\t45\nTRBV28\t46\nTRBV29-1\t47\nTRBV30\t48")
102 D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n") 109 D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n")
129 if(species == "human" && locus == "tra"){ 136 if(species == "human" && locus == "tra"){
130 useD = FALSE 137 useD = FALSE
131 cat("No D Genes in this species/locus") 138 cat("No D Genes in this species/locus")
132 } 139 }
133 140
141 print("test4\n")
134 142
135 tcV = textConnection(V) 143 tcV = textConnection(V)
136 Vchain = read.table(tcV, sep="\t", header=TRUE) 144 Vchain = read.table(tcV, sep="\t", header=TRUE)
137 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) 145 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)
138 close(tcV) 146 close(tcV)
178 write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) 186 write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T)
179 187
180 png("JPlot.png",width = 800, height = 600) 188 png("JPlot.png",width = 800, height = 600)
181 pJ 189 pJ
182 dev.off(); 190 dev.off();
191
192 print("test5\n")
183 193
184 VGenes = PRODF[,c("Sample", "Top.V.Gene", "freq")] 194 VGenes = PRODF[,c("Sample", "Top.V.Gene", "freq")]
185 VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene) 195 VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene)
186 VGenes = data.frame(data.table(VGenes)[, list(Count=sum(freq)), by=c("Sample", "Top.V.Gene")]) 196 VGenes = data.frame(data.table(VGenes)[, list(Count=sum(freq)), by=c("Sample", "Top.V.Gene")])
187 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) 197 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample])
243 revVchain = Vchain 253 revVchain = Vchain
244 revDchain = Dchain 254 revDchain = Dchain
245 revVchain$chr.orderV = rev(revVchain$chr.orderV) 255 revVchain$chr.orderV = rev(revVchain$chr.orderV)
246 revDchain$chr.orderD = rev(revDchain$chr.orderD) 256 revDchain$chr.orderD = rev(revDchain$chr.orderD)
247 257
258 print("test6\n")
248 259
249 plotVD <- function(dat){ 260 plotVD <- function(dat){
250 if(length(dat[,1]) == 0){ 261 if(length(dat[,1]) == 0){
251 return() 262 return()
252 } 263 }
351 un = unique(test$Sample) 362 un = unique(test$Sample)
352 un = paste(un, sep="\n") 363 un = paste(un, sep="\n")
353 writeLines(un, sampleFile) 364 writeLines(un, sampleFile)
354 close(sampleFile) 365 close(sampleFile)
355 366
367 print("test7\n")
356 368
357 if("Replicate" %in% colnames(test)) 369 if("Replicate" %in% colnames(test))
358 { 370 {
359 clonalityFrame = PROD 371 clonalityFrame = PROD
360 clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":")) 372 clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":"))
436 } 448 }
437 449
438 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) 450 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)
439 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) 451 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
440 } 452 }
453
454 print("test8\n")
441 455
442 if("Functionality" %in% colnames(test)) 456 if("Functionality" %in% colnames(test))
443 { 457 {
444 newData = data.frame(data.table(PROD)[,list(unique=.N, 458 newData = data.frame(data.table(PROD)[,list(unique=.N,
445 VH.DEL=mean(X3V.REGION.trimmed.nt.nb), 459 VH.DEL=mean(X3V.REGION.trimmed.nt.nb),
465 mean(P3D.nt.nb) + 479 mean(P3D.nt.nb) +
466 mean(P5J.nt.nb))), 480 mean(P5J.nt.nb))),
467 by=c("Sample")]) 481 by=c("Sample")])
468 write.table(newData, "junctionAnalysis.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) 482 write.table(newData, "junctionAnalysis.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F)
469 } 483 }
484
485 print("test9\n")