Mercurial > repos > davidvanzessen > report_clonality_tcell_igg
comparison RScript.r @ 3:b850200d4335 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 15 May 2014 09:30:54 -0400 |
parents | fd1b76816395 |
children | 0748ef4d42d3 |
comparison
equal
deleted
inserted
replaced
2:fd1b76816395 | 3:b850200d4335 |
---|---|
4 | 4 |
5 inFile = args[1] | 5 inFile = args[1] |
6 outFile = args[2] | 6 outFile = args[2] |
7 outDir = args[3] | 7 outDir = args[3] |
8 clonalType = args[4] | 8 clonalType = args[4] |
9 species = args[5] | |
10 locus = args[6] | |
11 selection = args[7] | |
9 | 12 |
10 if (!("gridExtra" %in% rownames(installed.packages()))) { | 13 if (!("gridExtra" %in% rownames(installed.packages()))) { |
11 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") | 14 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") |
12 } | 15 } |
13 library(gridExtra) | 16 library(gridExtra) |
61 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) | 64 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) |
62 PRODF$freq = gsub("_.*", "", PRODF$freq) | 65 PRODF$freq = gsub("_.*", "", PRODF$freq) |
63 PRODF$freq = as.numeric(PRODF$freq) | 66 PRODF$freq = as.numeric(PRODF$freq) |
64 } else { | 67 } else { |
65 PRODF$freq = 1 | 68 PRODF$freq = 1 |
66 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | 69 if(selection == "unique"){ |
70 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | |
71 } | |
67 } | 72 } |
68 | 73 |
69 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) | 74 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) |
70 PRODFV$Length = as.numeric(PRODFV$Length) | 75 PRODFV$Length = as.numeric(PRODFV$Length) |
71 Total = 0 | 76 Total = 0 |
85 Total = 0 | 90 Total = 0 |
86 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 91 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
87 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 92 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
88 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) | 93 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) |
89 | 94 |
90 V = c("v.name\tchr.orderV\nTRBV1\t1\nTRBV2\t2\nTRBV3\t3\nTRBV4\t4\nTRBV5\t5\nTRBV12-1\t6\nTRBV13-1\t7\nTRBV12-2\t8\nTRBV13-2\t9\nTRBV13-3\t10\nTRBV14\t11\nTRBV15\t12\nTRBV16\t13\nTRBV17\t14\nTRBV19\t15\nTRBV20\t16\nTRBV23\t17\nTRBV24\t18\nTRBV26\t19\nTRBV29\t20\nTRBV30\t21\nTRBV31\t22\n") | 95 V = c("v.name\tchr.orderV\n") |
96 D = c("v.name\tchr.orderD\n") | |
97 J = c("v.name\tchr.orderJ\n") | |
98 | |
99 if(species == "human"){ | |
100 if(locus == "trb"){ | |
101 V = c("v.name\tchr.orderV\nTRBV2\t1\nTRBV3-1\t2\nTRBV4-1\t3\nTRBV5-1\t4\nTRBV6-1\t5\nTRBV4-2\t6\nTRBV6-2\t7\nTRBV4-3\t8\nTRBV6-3\t9\nTRBV7-2\t10\nTRBV6-4\t11\nTRBV7-3\t12\nTRBV9\t13\nTRBV10-1\t14\nTRBV11-1\t15\nTRBV10-2\t16\nTRBV11-2\t17\nTRBV6-5\t18\nTRBV7-4\t19\nTRBV5-4\t20\nTRBV6-6\t21\nTRBV5-5\t22\nTRBV7-6\t23\nTRBV5-6\t24\nTRBV6-8\t25\nTRBV7-7\t26\nTRBV6-9\t27\nTRBV7-8\t28\nTRBV5-8\t29\nTRBV7-9\t30\nTRBV13\t31\nTRBV10-3\t32\nTRBV11-3\t33\nTRBV12-3\t34\nTRBV12-4\t35\nTRBV12-5\t36\nTRBV14\t37\nTRBV15\t38\nTRBV16\t39\nTRBV18\t40\nTRBV19\t41\nTRBV20-1\t42\nTRBV24-1\t43\nTRBV25-1\t44\nTRBV27\t45\nTRBV28\t46\nTRBV29-1\t47\nTRBV30\t48") | |
102 D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n") | |
103 J = c("v.name\tchr.orderJ\nTRBJ1-1\t1\nTRBJ1-2\t2\nTRBJ1-3\t3\nTRBJ1-4\t4\nTRBJ1-5\t5\nTRBJ1-6\t6\nTRBJ2-1\t7\nTRBJ2-2\t8\nTRBJ2-3\t9\nTRBJ2-4\t10\nTRBJ2-5\t11\nTRBJ2-6\t12\nTRBJ2-7\t13") | |
104 } else if (locus == "tra"){ | |
105 V = c("v.name\tchr.orderVTRAV1-1\t1\nTRAV1-2\t2\nTRAV2\t3\nTRAV3\t4\nTRAV4\t5\nTRAV5\t6\nTRAV6\t7\nTRAV7\t8\nTRAV8-1\t9\nTRAV9-1\t10\nTRAV10\t11\nTRAV12-1\t12\nTRAV8-2\t13\nTRAV8-3\t14\nTRAV13-1\t15\nTRAV12-2\t16\nTRAV8-4\t17\nTRAV13-2\t18\nTRAV14/DV4\t19\nTRAV9-2\t20\nTRAV12-3\t21\nTRAV8-6\t22\nTRAV16\t23\nTRAV17\t24\nTRAV18\t25\nTRAV19\t26\nTRAV20\t27\nTRAV21\t28\nTRAV22\t29\nTRAV23/DV6\t30\nTRAV24\t31\nTRAV25\t32\nTRAV26-1\t33\nTRAV27\t34\nTRAV29/DV5\t35\nTRAV30\t36\nTRAV26-2\t37\nTRAV34\t38\nTRAV35\t39\nTRAV36/DV7\t40\nTRAV38-1\t41\nTRAV38-2/DV8\t42\nTRAV39\t43\nTRAV40\t44\nTRAV41\t45\n") | |
106 D = c("v.name\tchr.orderD\n") | |
107 J = c("v.name\tchr.orderJ\nTRAJ57\t1\nTRAJ56\t2\nTRAJ54\t3\nTRAJ53\t4\nTRAJ52\t5\nTRAJ50\t6\nTRAJ49\t7\nTRAJ48\t8\nTRAJ47\t9\nTRAJ46\t10\nTRAJ45\t11\nTRAJ44\t12\nTRAJ43\t13\nTRAJ42\t14\nTRAJ41\t15\nTRAJ40\t16\nTRAJ39\t17\nTRAJ38\t18\nTRAJ37\t19\nTRAJ36\t20\nTRAJ34\t21\nTRAJ33\t22\nTRAJ32\t23\nTRAJ31\t24\nTRAJ30\t25\nTRAJ29\t26\nTRAJ28\t27\nTRAJ27\t28\nTRAJ26\t29\nTRAJ24\t30\nTRAJ23\t31\nTRAJ22\t32\nTRAJ21\t33\nTRAJ20\t34\nTRAJ18\t35\nTRAJ17\t36\nTRAJ16\t37\nTRAJ15\t38\nTRAJ14\t39\nTRAJ13\t40\nTRAJ12\t41\nTRAJ11\t42\nTRAJ10\t43\nTRAJ9\t44\nTRAJ8\t45\nTRAJ7\t46\nTRAJ6\t47\nTRAJ5\t48\nTRAJ4\t49\nTRAJ3\t50") | |
108 } else if (locus == "trg"){ | |
109 cat("human trg not yet implemented") | |
110 } else if (locus == "trd"){ | |
111 cat("human trd not yet implemented") | |
112 } | |
113 } else if (species == "mouse"){ | |
114 if(locus == "trb"){ | |
115 cat("mouse trb not yet implemented") | |
116 } else if (locus == "tra"){ | |
117 cat("mouse tra not yet implemented") | |
118 } else if (locus == "trg"){ | |
119 cat("mouse trg not yet implemented") | |
120 } else if (locus == "trd"){ | |
121 cat("mouse trd not yet implemented") | |
122 } | |
123 } | |
124 useD = TRUE | |
125 if(species == "human" && locus == "tra"){ | |
126 useD = FALSE | |
127 cat("No D Genes in this species/locus") | |
128 } | |
129 | |
130 | |
91 tcV = textConnection(V) | 131 tcV = textConnection(V) |
92 Vchain = read.table(tcV, sep="\t", header=TRUE) | 132 Vchain = read.table(tcV, sep="\t", header=TRUE) |
93 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) | 133 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) |
94 close(tcV) | 134 close(tcV) |
95 | 135 |
96 D = c("v.name\tchr.orderD\nTRBD1\t1\nTRBD2\t2\n") | 136 |
97 tcD = textConnection(D) | 137 tcD = textConnection(D) |
98 Dchain = read.table(tcD, sep="\t", header=TRUE) | 138 Dchain = read.table(tcD, sep="\t", header=TRUE) |
99 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE) | 139 PRODFD = merge(PRODFD, Dchain, by.x='Top.D.Gene', by.y='v.name', all.x=TRUE) |
100 close(tcD) | 140 close(tcD) |
101 | 141 |
102 | 142 |
103 J = c("v.name\tchr.orderJ\nTRBJ1-1\t1\nTRBJ1-2\t2\nTRBJ1-3\t3\nTRBJ1-4\t4\nTRBJ1-5\t5\nTRBJ2-1\t6\nTRBJ2-2\t7\nTRBJ2-3\t8\nTRBJ2-4\t9\nTRBJ2-5\t10\nTRBJ2-6\t11\nTRBJ2-7\t12\n") | 143 |
104 tcJ = textConnection(J) | 144 tcJ = textConnection(J) |
105 Jchain = read.table(tcJ, sep="\t", header=TRUE) | 145 Jchain = read.table(tcJ, sep="\t", header=TRUE) |
106 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) | 146 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) |
107 close(tcJ) | 147 close(tcJ) |
108 | 148 |
199 revVchain = Vchain | 239 revVchain = Vchain |
200 revDchain = Dchain | 240 revDchain = Dchain |
201 revVchain$chr.orderV = rev(revVchain$chr.orderV) | 241 revVchain$chr.orderV = rev(revVchain$chr.orderV) |
202 revDchain$chr.orderD = rev(revDchain$chr.orderD) | 242 revDchain$chr.orderD = rev(revDchain$chr.orderD) |
203 | 243 |
244 | |
204 plotVD <- function(dat){ | 245 plotVD <- function(dat){ |
205 if(length(dat[,1]) == 0){ | 246 if(length(dat[,1]) == 0){ |
206 return() | 247 return() |
207 } | 248 } |
208 img = ggplot() + | 249 img = ggplot() + |
234 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 275 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
235 VDList = split(completeVD, f=completeVD[,"Sample"]) | 276 VDList = split(completeVD, f=completeVD[,"Sample"]) |
236 | 277 |
237 lapply(VDList, FUN=plotVD) | 278 lapply(VDList, FUN=plotVD) |
238 | 279 |
239 | |
240 | |
241 plotVJ <- function(dat){ | 280 plotVJ <- function(dat){ |
242 if(length(dat[,1]) == 0){ | 281 if(length(dat[,1]) == 0){ |
243 return() | 282 return() |
244 } | 283 } |
245 img = ggplot() + | 284 img = ggplot() + |
301 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) | 340 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) |
302 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 341 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
303 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | 342 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) |
304 DJList = split(completeDJ, f=completeDJ[,"Sample"]) | 343 DJList = split(completeDJ, f=completeDJ[,"Sample"]) |
305 lapply(DJList, FUN=plotDJ) | 344 lapply(DJList, FUN=plotDJ) |
306 | |
307 | 345 |
308 sampleFile <- file("samples.txt") | 346 sampleFile <- file("samples.txt") |
309 un = unique(test$Sample) | 347 un = unique(test$Sample) |
310 un = paste(un, sep="\n") | 348 un = paste(un, sep="\n") |
311 writeLines(un, sampleFile) | 349 writeLines(un, sampleFile) |