Mercurial > repos > davidvanzessen > report_clonality_tcell_igg
comparison RScript.r @ 5:fb219dfdcc15 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 21 Aug 2014 10:44:40 -0400 |
parents | 0748ef4d42d3 |
children | f4ff7450ef16 |
comparison
equal
deleted
inserted
replaced
4:0748ef4d42d3 | 5:fb219dfdcc15 |
---|---|
15 } | 15 } |
16 library(gridExtra) | 16 library(gridExtra) |
17 if (!("ggplot2" %in% rownames(installed.packages()))) { | 17 if (!("ggplot2" %in% rownames(installed.packages()))) { |
18 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | 18 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") |
19 } | 19 } |
20 require(ggplot2) | 20 library(ggplot2) |
21 if (!("plyr" %in% rownames(installed.packages()))) { | 21 if (!("plyr" %in% rownames(installed.packages()))) { |
22 install.packages("plyr", repos="http://cran.xl-mirror.nl/") | 22 install.packages("plyr", repos="http://cran.xl-mirror.nl/") |
23 } | 23 } |
24 require(plyr) | 24 library(plyr) |
25 | 25 |
26 if (!("data.table" %in% rownames(installed.packages()))) { | 26 if (!("data.table" %in% rownames(installed.packages()))) { |
27 install.packages("data.table", repos="http://cran.xl-mirror.nl/") | 27 install.packages("data.table", repos="http://cran.xl-mirror.nl/") |
28 } | 28 } |
29 library(data.table) | 29 library(data.table) |
36 | 36 |
37 test = read.table(inFile, sep="\t", header=TRUE, fill=T, comment.char="") | 37 test = read.table(inFile, sep="\t", header=TRUE, fill=T, comment.char="") |
38 | 38 |
39 test = test[test$Sample != "",] | 39 test = test[test$Sample != "",] |
40 | 40 |
41 | 41 print("test1\n") |
42 | 42 |
43 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) | 43 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) |
44 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) | 44 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) |
45 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) | 45 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) |
46 | 46 |
55 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] | 55 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] |
56 | 56 |
57 #PRODF = PROD[ -1] | 57 #PRODF = PROD[ -1] |
58 | 58 |
59 PRODF = PROD | 59 PRODF = PROD |
60 | 60 print("test2\n") |
61 #PRODF = unique(PRODF) | 61 #PRODF = unique(PRODF) |
62 if(any(grepl(pattern="_", x=PRODF$ID))){ | 62 if(any(grepl(pattern="_", x=PRODF$ID))){ #dumb and way to simple |
63 | |
64 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) | 63 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) |
65 PRODF$freq = gsub("_.*", "", PRODF$freq) | 64 PRODF$freq = gsub("_.*", "", PRODF$freq) |
66 PRODF$freq = as.numeric(PRODF$freq) | 65 PRODF$freq = as.numeric(PRODF$freq) |
66 if(any(is.na(PRODF$freq))){ #fix the dumbness if it fails | |
67 PRODF$freq = 1 | |
68 if(selection == "unique"){ | |
69 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | |
70 } | |
71 } | |
67 } else { | 72 } else { |
68 PRODF$freq = 1 | 73 PRODF$freq = 1 |
69 if(selection == "unique"){ | 74 if(selection == "unique"){ |
70 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | 75 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] |
71 } | 76 } |
93 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) | 98 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) |
94 | 99 |
95 V = c("v.name\tchr.orderV\n") | 100 V = c("v.name\tchr.orderV\n") |
96 D = c("v.name\tchr.orderD\n") | 101 D = c("v.name\tchr.orderD\n") |
97 J = c("v.name\tchr.orderJ\n") | 102 J = c("v.name\tchr.orderJ\n") |
103 | |
104 print("test3\n") | |
98 | 105 |
99 if(species == "human"){ | 106 if(species == "human"){ |
100 if(locus == "trb"){ | 107 if(locus == "trb"){ |
101 V = c("v.name\tchr.orderV\nTRBV2\t1\nTRBV3-1\t2\nTRBV4-1\t3\nTRBV5-1\t4\nTRBV6-1\t5\nTRBV4-2\t6\nTRBV6-2\t7\nTRBV4-3\t8\nTRBV6-3\t9\nTRBV7-2\t10\nTRBV6-4\t11\nTRBV7-3\t12\nTRBV9\t13\nTRBV10-1\t14\nTRBV11-1\t15\nTRBV10-2\t16\nTRBV11-2\t17\nTRBV6-5\t18\nTRBV7-4\t19\nTRBV5-4\t20\nTRBV6-6\t21\nTRBV5-5\t22\nTRBV7-6\t23\nTRBV5-6\t24\nTRBV6-8\t25\nTRBV7-7\t26\nTRBV6-9\t27\nTRBV7-8\t28\nTRBV5-8\t29\nTRBV7-9\t30\nTRBV13\t31\nTRBV10-3\t32\nTRBV11-3\t33\nTRBV12-3\t34\nTRBV12-4\t35\nTRBV12-5\t36\nTRBV14\t37\nTRBV15\t38\nTRBV16\t39\nTRBV18\t40\nTRBV19\t41\nTRBV20-1\t42\nTRBV24-1\t43\nTRBV25-1\t44\nTRBV27\t45\nTRBV28\t46\nTRBV29-1\t47\nTRBV30\t48") | 108 V = c("v.name\tchr.orderV\nTRBV2\t1\nTRBV3-1\t2\nTRBV4-1\t3\nTRBV5-1\t4\nTRBV6-1\t5\nTRBV4-2\t6\nTRBV6-2\t7\nTRBV4-3\t8\nTRBV6-3\t9\nTRBV7-2\t10\nTRBV6-4\t11\nTRBV7-3\t12\nTRBV9\t13\nTRBV10-1\t14\nTRBV11-1\t15\nTRBV10-2\t16\nTRBV11-2\t17\nTRBV6-5\t18\nTRBV7-4\t19\nTRBV5-4\t20\nTRBV6-6\t21\nTRBV5-5\t22\nTRBV7-6\t23\nTRBV5-6\t24\nTRBV6-8\t25\nTRBV7-7\t26\nTRBV6-9\t27\nTRBV7-8\t28\nTRBV5-8\t29\nTRBV7-9\t30\nTRBV13\t31\nTRBV10-3\t32\nTRBV11-3\t33\nTRBV12-3\t34\nTRBV12-4\t35\nTRBV12-5\t36\nTRBV14\t37\nTRBV15\t38\nTRBV16\t39\nTRBV18\t40\nTRBV19\t41\nTRBV20-1\t42\nTRBV24-1\t43\nTRBV25-1\t44\nTRBV27\t45\nTRBV28\t46\nTRBV29-1\t47\nTRBV30\t48") |
102 D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n") | 109 D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n") |
129 if(species == "human" && locus == "tra"){ | 136 if(species == "human" && locus == "tra"){ |
130 useD = FALSE | 137 useD = FALSE |
131 cat("No D Genes in this species/locus") | 138 cat("No D Genes in this species/locus") |
132 } | 139 } |
133 | 140 |
141 print("test4\n") | |
134 | 142 |
135 tcV = textConnection(V) | 143 tcV = textConnection(V) |
136 Vchain = read.table(tcV, sep="\t", header=TRUE) | 144 Vchain = read.table(tcV, sep="\t", header=TRUE) |
137 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) | 145 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) |
138 close(tcV) | 146 close(tcV) |
178 write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | 186 write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) |
179 | 187 |
180 png("JPlot.png",width = 800, height = 600) | 188 png("JPlot.png",width = 800, height = 600) |
181 pJ | 189 pJ |
182 dev.off(); | 190 dev.off(); |
191 | |
192 print("test5\n") | |
183 | 193 |
184 VGenes = PRODF[,c("Sample", "Top.V.Gene", "freq")] | 194 VGenes = PRODF[,c("Sample", "Top.V.Gene", "freq")] |
185 VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene) | 195 VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene) |
186 VGenes = data.frame(data.table(VGenes)[, list(Count=sum(freq)), by=c("Sample", "Top.V.Gene")]) | 196 VGenes = data.frame(data.table(VGenes)[, list(Count=sum(freq)), by=c("Sample", "Top.V.Gene")]) |
187 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) | 197 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) |
243 revVchain = Vchain | 253 revVchain = Vchain |
244 revDchain = Dchain | 254 revDchain = Dchain |
245 revVchain$chr.orderV = rev(revVchain$chr.orderV) | 255 revVchain$chr.orderV = rev(revVchain$chr.orderV) |
246 revDchain$chr.orderD = rev(revDchain$chr.orderD) | 256 revDchain$chr.orderD = rev(revDchain$chr.orderD) |
247 | 257 |
258 print("test6\n") | |
248 | 259 |
249 plotVD <- function(dat){ | 260 plotVD <- function(dat){ |
250 if(length(dat[,1]) == 0){ | 261 if(length(dat[,1]) == 0){ |
251 return() | 262 return() |
252 } | 263 } |
351 un = unique(test$Sample) | 362 un = unique(test$Sample) |
352 un = paste(un, sep="\n") | 363 un = paste(un, sep="\n") |
353 writeLines(un, sampleFile) | 364 writeLines(un, sampleFile) |
354 close(sampleFile) | 365 close(sampleFile) |
355 | 366 |
367 print("test7\n") | |
356 | 368 |
357 if("Replicate" %in% colnames(test)) | 369 if("Replicate" %in% colnames(test)) |
358 { | 370 { |
359 clonalityFrame = PROD | 371 clonalityFrame = PROD |
360 clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":")) | 372 clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":")) |
436 } | 448 } |
437 | 449 |
438 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) | 450 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) |
439 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) | 451 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) |
440 } | 452 } |
453 | |
454 print("test8\n") | |
441 | 455 |
442 if("Functionality" %in% colnames(test)) | 456 if("Functionality" %in% colnames(test)) |
443 { | 457 { |
444 newData = data.frame(data.table(PROD)[,list(unique=.N, | 458 newData = data.frame(data.table(PROD)[,list(unique=.N, |
445 VH.DEL=mean(X3V.REGION.trimmed.nt.nb), | 459 VH.DEL=mean(X3V.REGION.trimmed.nt.nb), |
465 mean(P3D.nt.nb) + | 479 mean(P3D.nt.nb) + |
466 mean(P5J.nt.nb))), | 480 mean(P5J.nt.nb))), |
467 by=c("Sample")]) | 481 by=c("Sample")]) |
468 write.table(newData, "junctionAnalysis.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 482 write.table(newData, "junctionAnalysis.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
469 } | 483 } |
484 | |
485 print("test9\n") |