Mercurial > repos > davidvanzessen > report_clonality_tcell_igg
comparison RScript.r @ 2:fd1b76816395 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 27 Mar 2014 10:54:03 -0400 |
parents | 1d429107cd26 |
children | b850200d4335 |
comparison
equal
deleted
inserted
replaced
1:1ba88ffd6f4e | 2:fd1b76816395 |
---|---|
4 | 4 |
5 inFile = args[1] | 5 inFile = args[1] |
6 outFile = args[2] | 6 outFile = args[2] |
7 outDir = args[3] | 7 outDir = args[3] |
8 clonalType = args[4] | 8 clonalType = args[4] |
9 species = args[5] | |
10 | 9 |
11 if (!("gridExtra" %in% rownames(installed.packages()))) { | 10 if (!("gridExtra" %in% rownames(installed.packages()))) { |
12 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") | 11 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") |
13 } | 12 } |
14 library(gridExtra) | 13 library(gridExtra) |
24 if (!("data.table" %in% rownames(installed.packages()))) { | 23 if (!("data.table" %in% rownames(installed.packages()))) { |
25 install.packages("data.table", repos="http://cran.xl-mirror.nl/") | 24 install.packages("data.table", repos="http://cran.xl-mirror.nl/") |
26 } | 25 } |
27 library(data.table) | 26 library(data.table) |
28 | 27 |
28 if (!("reshape2" %in% rownames(installed.packages()))) { | |
29 install.packages("reshape2", repos="http://cran.xl-mirror.nl/") | |
30 } | |
31 library(reshape2) | |
32 | |
29 | 33 |
30 test = read.table(inFile, sep="\t", header=TRUE, fill=T, comment.char="") | 34 test = read.table(inFile, sep="\t", header=TRUE, fill=T, comment.char="") |
31 | 35 |
32 test = test[test$Sample != "",] | 36 test = test[test$Sample != "",] |
37 | |
38 | |
33 | 39 |
34 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) | 40 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) |
35 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) | 41 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) |
36 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) | 42 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) |
37 | 43 |
48 #PRODF = PROD[ -1] | 54 #PRODF = PROD[ -1] |
49 | 55 |
50 PRODF = PROD | 56 PRODF = PROD |
51 | 57 |
52 #PRODF = unique(PRODF) | 58 #PRODF = unique(PRODF) |
53 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | 59 if(any(grepl(pattern="_", x=PRODF$ID))){ |
54 | 60 |
55 PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")]) | 61 PRODF$freq = gsub("^[0-9]+_", "", PRODF$ID) |
62 PRODF$freq = gsub("_.*", "", PRODF$freq) | |
63 PRODF$freq = as.numeric(PRODF$freq) | |
64 } else { | |
65 PRODF$freq = 1 | |
66 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | |
67 } | |
68 | |
69 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) | |
56 PRODFV$Length = as.numeric(PRODFV$Length) | 70 PRODFV$Length = as.numeric(PRODFV$Length) |
57 Total = 0 | 71 Total = 0 |
58 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 72 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
59 PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 73 PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
60 PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total)) | 74 PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total)) |
61 | 75 |
62 PRODFD = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.D.Gene")]) | 76 PRODFD = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.D.Gene")]) |
63 PRODFD$Length = as.numeric(PRODFD$Length) | 77 PRODFD$Length = as.numeric(PRODFD$Length) |
64 Total = 0 | 78 Total = 0 |
65 Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 79 Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
66 PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 80 PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
67 PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total)) | 81 PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total)) |
68 | 82 |
69 PRODFJ = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.J.Gene")]) | 83 PRODFJ = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.J.Gene")]) |
70 PRODFJ$Length = as.numeric(PRODFJ$Length) | 84 PRODFJ$Length = as.numeric(PRODFJ$Length) |
71 Total = 0 | 85 Total = 0 |
72 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 86 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
73 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 87 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
74 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) | 88 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) |
92 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) | 106 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) |
93 close(tcJ) | 107 close(tcJ) |
94 | 108 |
95 setwd(outDir) | 109 setwd(outDir) |
96 | 110 |
97 write.table(PRODF, "allUnique.tsv", sep="\t",quote=F,row.names=F,col.names=T) | 111 write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T) |
98 | 112 |
99 pV = ggplot(PRODFV) | 113 pV = ggplot(PRODFV) |
100 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 114 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
101 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") | 115 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") |
116 write.table(x=PRODFV, file="VFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | |
102 | 117 |
103 png("VPlot.png",width = 1280, height = 720) | 118 png("VPlot.png",width = 1280, height = 720) |
104 pV | 119 pV |
105 dev.off(); | 120 dev.off(); |
106 | 121 |
107 pD = ggplot(PRODFD) | 122 pD = ggplot(PRODFD) |
108 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 123 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
109 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") | 124 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") |
125 write.table(x=PRODFD, file="DFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | |
110 | 126 |
111 png("DPlot.png",width = 800, height = 600) | 127 png("DPlot.png",width = 800, height = 600) |
112 pD | 128 pD |
113 dev.off(); | 129 dev.off(); |
114 | 130 |
115 pJ = ggplot(PRODFJ) | 131 pJ = ggplot(PRODFJ) |
116 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 132 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
117 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") | 133 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") |
134 write.table(x=PRODFJ, file="JFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | |
118 | 135 |
119 png("JPlot.png",width = 800, height = 600) | 136 png("JPlot.png",width = 800, height = 600) |
120 pJ | 137 pJ |
121 dev.off(); | 138 dev.off(); |
139 | |
140 VGenes = PRODF[,c("Sample", "Top.V.Gene", "freq")] | |
141 VGenes$Top.V.Gene = gsub("-.*", "", VGenes$Top.V.Gene) | |
142 VGenes = data.frame(data.table(VGenes)[, list(Count=sum(freq)), by=c("Sample", "Top.V.Gene")]) | |
143 TotalPerSample = data.frame(data.table(VGenes)[, list(total=sum(.SD$Count)), by=Sample]) | |
144 VGenes = merge(VGenes, TotalPerSample, by="Sample") | |
145 VGenes$Frequency = VGenes$Count * 100 / VGenes$total | |
146 VPlot = ggplot(VGenes) | |
147 VPlot = VPlot + geom_bar(aes( x = Top.V.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
148 ggtitle("Distribution of V gene families") + | |
149 ylab("Percentage of sequences") | |
150 png("VFPlot.png") | |
151 VPlot | |
152 dev.off(); | |
153 write.table(x=VGenes, file="VFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | |
154 | |
155 DGenes = PRODF[,c("Sample", "Top.D.Gene", "freq")] | |
156 DGenes$Top.D.Gene = gsub("-.*", "", DGenes$Top.D.Gene) | |
157 DGenes = data.frame(data.table(DGenes)[, list(Count=sum(freq)), by=c("Sample", "Top.D.Gene")]) | |
158 TotalPerSample = data.frame(data.table(DGenes)[, list(total=sum(.SD$Count)), by=Sample]) | |
159 DGenes = merge(DGenes, TotalPerSample, by="Sample") | |
160 DGenes$Frequency = DGenes$Count * 100 / DGenes$total | |
161 DPlot = ggplot(DGenes) | |
162 DPlot = DPlot + geom_bar(aes( x = Top.D.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
163 ggtitle("Distribution of D gene families") + | |
164 ylab("Percentage of sequences") | |
165 png("DFPlot.png") | |
166 DPlot | |
167 dev.off(); | |
168 write.table(x=DGenes, file="DFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | |
169 | |
170 JGenes = PRODF[,c("Sample", "Top.J.Gene", "freq")] | |
171 JGenes$Top.J.Gene = gsub("-.*", "", JGenes$Top.J.Gene) | |
172 JGenes = data.frame(data.table(JGenes)[, list(Count=sum(freq)), by=c("Sample", "Top.J.Gene")]) | |
173 TotalPerSample = data.frame(data.table(JGenes)[, list(total=sum(.SD$Count)), by=Sample]) | |
174 JGenes = merge(JGenes, TotalPerSample, by="Sample") | |
175 JGenes$Frequency = JGenes$Count * 100 / JGenes$total | |
176 JPlot = ggplot(JGenes) | |
177 JPlot = JPlot + geom_bar(aes( x = Top.J.Gene, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
178 ggtitle("Distribution of J gene families") + | |
179 ylab("Percentage of sequences") | |
180 png("JFPlot.png") | |
181 JPlot | |
182 dev.off(); | |
183 write.table(x=JGenes, file="JFFrequency.csv", sep=",",quote=F,row.names=F,col.names=T) | |
184 | |
185 CDR3Length = data.frame(data.table(PRODF)[, list(Count=sum(freq)), by=c("Sample", "CDR3.Length.DNA")]) | |
186 TotalPerSample = data.frame(data.table(CDR3Length)[, list(total=sum(.SD$Count)), by=Sample]) | |
187 CDR3Length = merge(CDR3Length, TotalPerSample, by="Sample") | |
188 CDR3Length$Frequency = CDR3Length$Count * 100 / CDR3Length$total | |
189 CDR3LengthPlot = ggplot(CDR3Length) | |
190 CDR3LengthPlot = CDR3LengthPlot + geom_bar(aes( x = CDR3.Length.DNA, y = Frequency, fill = Sample), stat='identity', position='dodge' ) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
191 ggtitle("Length distribution of CDR3") + | |
192 xlab("CDR3 Length") + | |
193 ylab("Percentage of sequences") | |
194 png("CDR3LengthPlot.png",width = 1280, height = 720) | |
195 CDR3LengthPlot | |
196 dev.off() | |
197 write.table(x=CDR3Length, file="CDR3LengthPlot.csv", sep=",",quote=F,row.names=F,col.names=T) | |
122 | 198 |
123 revVchain = Vchain | 199 revVchain = Vchain |
124 revDchain = Dchain | 200 revDchain = Dchain |
125 revVchain$chr.orderV = rev(revVchain$chr.orderV) | 201 revVchain$chr.orderV = rev(revVchain$chr.orderV) |
126 revDchain$chr.orderD = rev(revDchain$chr.orderD) | 202 revDchain$chr.orderD = rev(revDchain$chr.orderD) |
127 | |
128 cat("before VD", "\n") | |
129 | 203 |
130 plotVD <- function(dat){ | 204 plotVD <- function(dat){ |
131 if(length(dat[,1]) == 0){ | 205 if(length(dat[,1]) == 0){ |
132 return() | 206 return() |
133 } | 207 } |
139 xlab("D genes") + | 213 xlab("D genes") + |
140 ylab("V Genes") | 214 ylab("V Genes") |
141 | 215 |
142 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 216 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) |
143 print(img) | 217 print(img) |
218 | |
144 dev.off() | 219 dev.off() |
145 } | 220 write.table(x=acast(dat, Top.V.Gene~Top.D.Gene, value.var="Length"), file=paste("HeatmapVD_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) |
146 | 221 } |
147 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | 222 |
223 VandDCount = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | |
148 | 224 |
149 VandDCount$l = log(VandDCount$Length) | 225 VandDCount$l = log(VandDCount$Length) |
150 maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")]) | 226 maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")]) |
151 VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T) | 227 VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T) |
152 VandDCount$relLength = VandDCount$l / VandDCount$max | 228 VandDCount$relLength = VandDCount$l / VandDCount$max |
158 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 234 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
159 VDList = split(completeVD, f=completeVD[,"Sample"]) | 235 VDList = split(completeVD, f=completeVD[,"Sample"]) |
160 | 236 |
161 lapply(VDList, FUN=plotVD) | 237 lapply(VDList, FUN=plotVD) |
162 | 238 |
163 cat("after VD", "\n") | 239 |
164 | |
165 cat("before VJ", "\n") | |
166 | 240 |
167 plotVJ <- function(dat){ | 241 plotVJ <- function(dat){ |
168 if(length(dat[,1]) == 0){ | 242 if(length(dat[,1]) == 0){ |
169 return() | 243 return() |
170 } | 244 } |
177 ylab("V Genes") | 251 ylab("V Genes") |
178 | 252 |
179 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 253 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) |
180 print(img) | 254 print(img) |
181 dev.off() | 255 dev.off() |
182 } | 256 write.table(x=acast(dat, Top.V.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapVJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) |
183 | 257 } |
184 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | 258 |
259 VandJCount = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | |
185 | 260 |
186 VandJCount$l = log(VandJCount$Length) | 261 VandJCount$l = log(VandJCount$Length) |
187 maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")]) | 262 maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")]) |
188 VandJCount = merge(VandJCount, maxVJ, by.x="Sample", by.y="Sample", all.x=T) | 263 VandJCount = merge(VandJCount, maxVJ, by.x="Sample", by.y="Sample", all.x=T) |
189 VandJCount$relLength = VandJCount$l / VandJCount$max | 264 VandJCount$relLength = VandJCount$l / VandJCount$max |
193 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) | 268 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) |
194 completeVJ = merge(completeVJ, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | 269 completeVJ = merge(completeVJ, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) |
195 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | 270 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) |
196 VJList = split(completeVJ, f=completeVJ[,"Sample"]) | 271 VJList = split(completeVJ, f=completeVJ[,"Sample"]) |
197 lapply(VJList, FUN=plotVJ) | 272 lapply(VJList, FUN=plotVJ) |
198 | |
199 cat("after VJ", "\n") | |
200 | |
201 cat("before DJ", "\n") | |
202 | 273 |
203 plotDJ <- function(dat){ | 274 plotDJ <- function(dat){ |
204 if(length(dat[,1]) == 0){ | 275 if(length(dat[,1]) == 0){ |
205 return() | 276 return() |
206 } | 277 } |
213 ylab("D Genes") | 284 ylab("D Genes") |
214 | 285 |
215 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 286 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) |
216 print(img) | 287 print(img) |
217 dev.off() | 288 dev.off() |
289 write.table(x=acast(dat, Top.D.Gene~Top.J.Gene, value.var="Length"), file=paste("HeatmapDJ_", unique(dat[3])[1,1], ".csv", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | |
218 } | 290 } |
219 | 291 |
220 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 292 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) |
221 | 293 |
222 DandJCount$l = log(DandJCount$Length) | 294 DandJCount$l = log(DandJCount$Length) |
230 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 302 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
231 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | 303 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) |
232 DJList = split(completeDJ, f=completeDJ[,"Sample"]) | 304 DJList = split(completeDJ, f=completeDJ[,"Sample"]) |
233 lapply(DJList, FUN=plotDJ) | 305 lapply(DJList, FUN=plotDJ) |
234 | 306 |
235 cat("after DJ", "\n") | |
236 | 307 |
237 sampleFile <- file("samples.txt") | 308 sampleFile <- file("samples.txt") |
238 un = unique(test$Sample) | 309 un = unique(test$Sample) |
239 un = paste(un, sep="\n") | 310 un = paste(un, sep="\n") |
240 writeLines(un, sampleFile) | 311 writeLines(un, sampleFile) |
241 close(sampleFile) | 312 close(sampleFile) |
242 | 313 |
243 | 314 |
244 | |
245 if("Replicate" %in% colnames(test)) | 315 if("Replicate" %in% colnames(test)) |
246 { | 316 { |
247 clonalityFrame = PROD | 317 clonalityFrame = PROD |
248 clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":")) | 318 clonalityFrame$ReplicateConcat = do.call(paste, c(clonalityFrame[c("VDJCDR3", "Sample", "Replicate")], sep = ":")) |
249 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$ReplicateConcat), ] | 319 clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$ReplicateConcat), ] |
250 write.table(clonalityFrame, "clonalityComplete.tsv", sep="\t",quote=F,row.names=F,col.names=T) | 320 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) |
251 | 321 |
252 ClonalitySampleReplicatePrint <- function(dat){ | 322 ClonalitySampleReplicatePrint <- function(dat){ |
253 write.table(dat, paste("clonality_", unique(dat$Sample) , "_", unique(dat$Replicate), ".tsv", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 323 write.table(dat, paste("clonality_", unique(dat$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) |
254 } | 324 } |
255 | 325 |
256 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")]) | 326 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")]) |
257 lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint) | 327 #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint) |
258 | 328 |
259 ClonalitySamplePrint <- function(dat){ | 329 ClonalitySamplePrint <- function(dat){ |
260 write.table(dat, paste("clonality_", unique(dat$Sample) , ".tsv", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 330 write.table(dat, paste("clonality_", unique(dat$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) |
261 } | 331 } |
262 | 332 |
263 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"]) | 333 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"]) |
264 lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint) | 334 #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint) |
265 | 335 |
266 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "VDJCDR3")]) | 336 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "VDJCDR3")]) |
267 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) | 337 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) |
268 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count | 338 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count |
269 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) | 339 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) |