comparison RScript.r @ 3:b850200d4335 draft

Uploaded
author davidvanzessen
date Thu, 15 May 2014 09:30:54 -0400
parents fd1b76816395
children 0748ef4d42d3
comparison
equal deleted inserted replaced
2:fd1b76816395 3:b850200d4335
4 4
5 inFile = args[1] 5 inFile = args[1]
6 outFile = args[2] 6 outFile = args[2]
7 outDir = args[3] 7 outDir = args[3]
8 clonalType = args[4] 8 clonalType = args[4]
9 species = args[5]
10 locus = args[6]
11 selection = args[7]
9 12
10 if (!("gridExtra" %in% rownames(installed.packages()))) { 13 if (!("gridExtra" %in% rownames(installed.packages()))) {
11 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") 14 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/")
12 } 15 }
13 library(gridExtra) 16 library(gridExtra)
61 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) 64 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID)
62 PRODF$freq = gsub("_.*", "", PRODF$freq) 65 PRODF$freq = gsub("_.*", "", PRODF$freq)
63 PRODF$freq = as.numeric(PRODF$freq) 66 PRODF$freq = as.numeric(PRODF$freq)
64 } else { 67 } else {
65 PRODF$freq = 1 68 PRODF$freq = 1
66 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] 69 if(selection == "unique"){
70 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ]
71 }
67 } 72 }
68 73
69 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) 74 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")])
70 PRODFV$Length = as.numeric(PRODFV$Length) 75 PRODFV$Length = as.numeric(PRODFV$Length)
71 Total = 0 76 Total = 0
85 Total = 0 90 Total = 0
86 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) 91 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length)))
87 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) 92 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
88 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) 93 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total))
89 94
90 V = c("v.name\tchr.orderV\nTRBV1\t1\nTRBV2\t2\nTRBV3\t3\nTRBV4\t4\nTRBV5\t5\nTRBV12-1\t6\nTRBV13-1\t7\nTRBV12-2\t8\nTRBV13-2\t9\nTRBV13-3\t10\nTRBV14\t11\nTRBV15\t12\nTRBV16\t13\nTRBV17\t14\nTRBV19\t15\nTRBV20\t16\nTRBV23\t17\nTRBV24\t18\nTRBV26\t19\nTRBV29\t20\nTRBV30\t21\nTRBV31\t22\n") 95 V = c("v.name\tchr.orderV\n")
96 D = c("v.name\tchr.orderD\n")
97 J = c("v.name\tchr.orderJ\n")
98
99 if(species == "human"){
100 if(locus == "trb"){
101 V = c("v.name\tchr.orderV\nTRBV2\t1\nTRBV3-1\t2\nTRBV4-1\t3\nTRBV5-1\t4\nTRBV6-1\t5\nTRBV4-2\t6\nTRBV6-2\t7\nTRBV4-3\t8\nTRBV6-3\t9\nTRBV7-2\t10\nTRBV6-4\t11\nTRBV7-3\t12\nTRBV9\t13\nTRBV10-1\t14\nTRBV11-1\t15\nTRBV10-2\t16\nTRBV11-2\t17\nTRBV6-5\t18\nTRBV7-4\t19\nTRBV5-4\t20\nTRBV6-6\t21\nTRBV5-5\t22\nTRBV7-6\t23\nTRBV5-6\t24\nTRBV6-8\t25\nTRBV7-7\t26\nTRBV6-9\t27\nTRBV7-8\t28\nTRBV5-8\t29\nTRBV7-9\t30\nTRBV13\t31\nTRBV10-3\t32\nTRBV11-3\t33\nTRBV12-3\t34\nTRBV12-4\t35\nTRBV12-5\t36\nTRBV14\t37\nTRBV15\t38\nTRBV16\t39\nTRBV18\t40\nTRBV19\t41\nTRBV20-1\t42\nTRBV24-1\t43\nTRBV25-1\t44\nTRBV27\t45\nTRBV28\t46\nTRBV29-1\t47\nTRBV30\t48")
102 D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n")
103 J = c("v.name\tchr.orderJ\nTRBJ1-1\t1\nTRBJ1-2\t2\nTRBJ1-3\t3\nTRBJ1-4\t4\nTRBJ1-5\t5\nTRBJ1-6\t6\nTRBJ2-1\t7\nTRBJ2-2\t8\nTRBJ2-3\t9\nTRBJ2-4\t10\nTRBJ2-5\t11\nTRBJ2-6\t12\nTRBJ2-7\t13")
104 } else if (locus == "tra"){
105 V = c("v.name\tchr.orderVTRAV1-1\t1\nTRAV1-2\t2\nTRAV2\t3\nTRAV3\t4\nTRAV4\t5\nTRAV5\t6\nTRAV6\t7\nTRAV7\t8\nTRAV8-1\t9\nTRAV9-1\t10\nTRAV10\t11\nTRAV12-1\t12\nTRAV8-2\t13\nTRAV8-3\t14\nTRAV13-1\t15\nTRAV12-2\t16\nTRAV8-4\t17\nTRAV13-2\t18\nTRAV14/DV4\t19\nTRAV9-2\t20\nTRAV12-3\t21\nTRAV8-6\t22\nTRAV16\t23\nTRAV17\t24\nTRAV18\t25\nTRAV19\t26\nTRAV20\t27\nTRAV21\t28\nTRAV22\t29\nTRAV23/DV6\t30\nTRAV24\t31\nTRAV25\t32\nTRAV26-1\t33\nTRAV27\t34\nTRAV29/DV5\t35\nTRAV30\t36\nTRAV26-2\t37\nTRAV34\t38\nTRAV35\t39\nTRAV36/DV7\t40\nTRAV38-1\t41\nTRAV38-2/DV8\t42\nTRAV39\t43\nTRAV40\t44\nTRAV41\t45\n")
106 D = c("v.name\tchr.orderD\n")
107 J = c("v.name\tchr.orderJ\nTRAJ57\t1\nTRAJ56\t2\nTRAJ54\t3\nTRAJ53\t4\nTRAJ52\t5\nTRAJ50\t6\nTRAJ49\t7\nTRAJ48\t8\nTRAJ47\t9\nTRAJ46\t10\nTRAJ45\t11\nTRAJ44\t12\nTRAJ43\t13\nTRAJ42\t14\nTRAJ41\t15\nTRAJ40\t16\nTRAJ39\t17\nTRAJ38\t18\nTRAJ37\t19\nTRAJ36\t20\nTRAJ34\t21\nTRAJ33\t22\nTRAJ32\t23\nTRAJ31\t24\nTRAJ30\t25\nTRAJ29\t26\nTRAJ28\t27\nTRAJ27\t28\nTRAJ26\t29\nTRAJ24\t30\nTRAJ23\t31\nTRAJ22\t32\nTRAJ21\t33\nTRAJ20\t34\nTRAJ18\t35\nTRAJ17\t36\nTRAJ16\t37\nTRAJ15\t38\nTRAJ14\t39\nTRAJ13\t40\nTRAJ12\t41\nTRAJ11\t42\nTRAJ10\t43\nTRAJ9\t44\nTRAJ8\t45\nTRAJ7\t46\nTRAJ6\t47\nTRAJ5\t48\nTRAJ4\t49\nTRAJ3\t50")
108 } else if (locus == "trg"){
109 cat("human trg not yet implemented")
110 } else if (locus == "trd"){
111 cat("human trd not yet implemented")
112 }
113 } else if (species == "mouse"){
114 if(locus == "trb"){
115 cat("mouse trb not yet implemented")
116 } else if (locus == "tra"){
117 cat("mouse tra not yet implemented")
118 } else if (locus == "trg"){
119 cat("mouse trg not yet implemented")
120 } else if (locus == "trd"){
121 cat("mouse trd not yet implemented")
122 }
123 }
124 useD = TRUE
125 if(species == "human" && locus == "tra"){
126 useD = FALSE
127 cat("No D Genes in this species/locus")
128 }
129
130
91 tcV = textConnection(V) 131 tcV = textConnection(V)
92 Vchain = read.table(tcV, sep="\t", header=TRUE) 132 Vchain = read.table(tcV, sep="\t", header=TRUE)
93 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) 133 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)
94 close(tcV) 134 close(tcV)
95 135
96 D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n") 136
97 tcD = textConnection(D) 137 tcD = textConnection(D)
98 Dchain = read.table(tcD, sep="\t", header=TRUE) 138 Dchain = read.table(tcD, sep="\t", header=TRUE)
99 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE) 139 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE)
100 close(tcD) 140 close(tcD)
101 141
102 142
103 J = c("v.name\tchr.orderJ\nTRBJ1-1\t1\nTRBJ1-2\t2\nTRBJ1-3\t3\nTRBJ1-4\t4\nTRBJ1-5\t5\nTRBJ2-1\t6\nTRBJ2-2\t7\nTRBJ2-3\t8\nTRBJ2-4\t9\nTRBJ2-5\t10\nTRBJ2-6\t11\nTRBJ2-7\t12\n") 143
104 tcJ = textConnection(J) 144 tcJ = textConnection(J)
105 Jchain = read.table(tcJ, sep="\t", header=TRUE) 145 Jchain = read.table(tcJ, sep="\t", header=TRUE)
106 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) 146 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE)
107 close(tcJ) 147 close(tcJ)
108 148
199 revVchain = Vchain 239 revVchain = Vchain
200 revDchain = Dchain 240 revDchain = Dchain
201 revVchain$chr.orderV = rev(revVchain$chr.orderV) 241 revVchain$chr.orderV = rev(revVchain$chr.orderV)
202 revDchain$chr.orderD = rev(revDchain$chr.orderD) 242 revDchain$chr.orderD = rev(revDchain$chr.orderD)
203 243
244
204 plotVD <- function(dat){ 245 plotVD <- function(dat){
205 if(length(dat[,1]) == 0){ 246 if(length(dat[,1]) == 0){
206 return() 247 return()
207 } 248 }
208 img = ggplot() + 249 img = ggplot() +
234 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) 275 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE)
235 VDList = split(completeVD, f=completeVD[,"Sample"]) 276 VDList = split(completeVD, f=completeVD[,"Sample"])
236 277
237 lapply(VDList, FUN=plotVD) 278 lapply(VDList, FUN=plotVD)
238 279
239
240
241 plotVJ <- function(dat){ 280 plotVJ <- function(dat){
242 if(length(dat[,1]) == 0){ 281 if(length(dat[,1]) == 0){
243 return() 282 return()
244 } 283 }
245 img = ggplot() + 284 img = ggplot() +
301 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) 340 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE)
302 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) 341 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE)
303 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) 342 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE)
304 DJList = split(completeDJ, f=completeDJ[,"Sample"]) 343 DJList = split(completeDJ, f=completeDJ[,"Sample"])
305 lapply(DJList, FUN=plotDJ) 344 lapply(DJList, FUN=plotDJ)
306
307 345
308 sampleFile <- file("samples.txt") 346 sampleFile <- file("samples.txt")
309 un = unique(test$Sample) 347 un = unique(test$Sample)
310 un = paste(un, sep="\n") 348 un = paste(un, sep="\n")
311 writeLines(un, sampleFile) 349 writeLines(un, sampleFile)