Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 26:2433a1e110e1 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 08 Apr 2015 05:25:52 -0400 |
parents | 58a62d2c0377 |
children | ac9a4307861a |
comparison
equal
deleted
inserted
replaced
25:58a62d2c0377 | 26:2433a1e110e1 |
---|---|
146 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns) | 146 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns) |
147 | 147 |
148 | 148 |
149 setwd(outputdir) | 149 setwd(outputdir) |
150 | 150 |
151 nts = c("a", "t", "g", "c") | 151 nts = c("a", "c", "g", "t") |
152 zeros=rep(0, 4) | 152 zeros=rep(0, 4) |
153 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7) | 153 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7) |
154 for(i in 1:length(genes)){ | 154 for(i in 1:length(genes)){ |
155 gene = genes[i] | 155 gene = genes[i] |
156 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] | 156 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] |
296 | 296 |
297 genesForPlot = gsub("[0-9]", "", dat$best_match) | 297 genesForPlot = gsub("[0-9]", "", dat$best_match) |
298 genesForPlot = data.frame(table(genesForPlot)) | 298 genesForPlot = data.frame(table(genesForPlot)) |
299 colnames(genesForPlot) = c("Gene","Freq") | 299 colnames(genesForPlot) = c("Gene","Freq") |
300 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | 300 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
301 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T) | |
302 | |
301 | 303 |
302 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | 304 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) |
303 pc = pc + geom_bar(width = 1, stat = "identity") | 305 pc = pc + geom_bar(width = 1, stat = "identity") |
304 pc = pc + coord_polar(theta="y") | 306 pc = pc + coord_polar(theta="y") |
305 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) | 307 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")")) |
306 | 308 |
307 png(filename="all.png") | 309 png(filename="all.png") |
308 pc | 310 pc |
309 dev.off() | 311 dev.off() |
310 | 312 |
317 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | 319 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
318 | 320 |
319 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | 321 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) |
320 pc = pc + geom_bar(width = 1, stat = "identity") | 322 pc = pc + geom_bar(width = 1, stat = "identity") |
321 pc = pc + coord_polar(theta="y") | 323 pc = pc + coord_polar(theta="y") |
322 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) | 324 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA subclasses", "( n =", sum(genesForPlot$Freq), ")")) |
323 | 325 write.table(genesForPlot, "ca.txt", sep="\t",quote=F,row.names=F,col.names=T) |
324 | 326 |
325 png(filename="ca.png") | 327 png(filename="ca.png") |
326 print(pc) | 328 print(pc) |
327 dev.off() | 329 dev.off() |
328 } | 330 } |
334 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | 336 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
335 | 337 |
336 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | 338 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) |
337 pc = pc + geom_bar(width = 1, stat = "identity") | 339 pc = pc + geom_bar(width = 1, stat = "identity") |
338 pc = pc + coord_polar(theta="y") | 340 pc = pc + coord_polar(theta="y") |
339 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG", "( n =", sum(genesForPlot$Freq), ")")) | 341 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG subclasses", "( n =", sum(genesForPlot$Freq), ")")) |
340 | 342 write.table(genesForPlot, "cg.txt", sep="\t",quote=F,row.names=F,col.names=T) |
341 | 343 |
342 png(filename="cg.png") | 344 png(filename="cg.png") |
343 print(pc) | 345 print(pc) |
344 dev.off() | 346 dev.off() |
345 } | 347 } |
346 | 348 |
347 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) | 349 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) |
348 | 350 |
349 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales) | 351 p = ggplot(dat, aes(best_match, percentage_mutations)) |
350 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter") | 352 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter") |
351 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") | 353 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") |
354 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) | |
355 | |
352 | 356 |
353 png(filename="scatter.png") | 357 png(filename="scatter.png") |
354 print(p) | 358 print(p) |
355 dev.off() | 359 dev.off() |
356 | 360 |