Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 26:2433a1e110e1 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 08 Apr 2015 05:25:52 -0400 |
| parents | 58a62d2c0377 |
| children | ac9a4307861a |
comparison
equal
deleted
inserted
replaced
| 25:58a62d2c0377 | 26:2433a1e110e1 |
|---|---|
| 146 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns) | 146 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns) |
| 147 | 147 |
| 148 | 148 |
| 149 setwd(outputdir) | 149 setwd(outputdir) |
| 150 | 150 |
| 151 nts = c("a", "t", "g", "c") | 151 nts = c("a", "c", "g", "t") |
| 152 zeros=rep(0, 4) | 152 zeros=rep(0, 4) |
| 153 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7) | 153 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7) |
| 154 for(i in 1:length(genes)){ | 154 for(i in 1:length(genes)){ |
| 155 gene = genes[i] | 155 gene = genes[i] |
| 156 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] | 156 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] |
| 296 | 296 |
| 297 genesForPlot = gsub("[0-9]", "", dat$best_match) | 297 genesForPlot = gsub("[0-9]", "", dat$best_match) |
| 298 genesForPlot = data.frame(table(genesForPlot)) | 298 genesForPlot = data.frame(table(genesForPlot)) |
| 299 colnames(genesForPlot) = c("Gene","Freq") | 299 colnames(genesForPlot) = c("Gene","Freq") |
| 300 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | 300 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
| 301 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T) | |
| 302 | |
| 301 | 303 |
| 302 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | 304 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) |
| 303 pc = pc + geom_bar(width = 1, stat = "identity") | 305 pc = pc + geom_bar(width = 1, stat = "identity") |
| 304 pc = pc + coord_polar(theta="y") | 306 pc = pc + coord_polar(theta="y") |
| 305 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) | 307 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")")) |
| 306 | 308 |
| 307 png(filename="all.png") | 309 png(filename="all.png") |
| 308 pc | 310 pc |
| 309 dev.off() | 311 dev.off() |
| 310 | 312 |
| 317 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | 319 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
| 318 | 320 |
| 319 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | 321 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) |
| 320 pc = pc + geom_bar(width = 1, stat = "identity") | 322 pc = pc + geom_bar(width = 1, stat = "identity") |
| 321 pc = pc + coord_polar(theta="y") | 323 pc = pc + coord_polar(theta="y") |
| 322 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) | 324 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA subclasses", "( n =", sum(genesForPlot$Freq), ")")) |
| 323 | 325 write.table(genesForPlot, "ca.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 324 | 326 |
| 325 png(filename="ca.png") | 327 png(filename="ca.png") |
| 326 print(pc) | 328 print(pc) |
| 327 dev.off() | 329 dev.off() |
| 328 } | 330 } |
| 334 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | 336 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
| 335 | 337 |
| 336 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | 338 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) |
| 337 pc = pc + geom_bar(width = 1, stat = "identity") | 339 pc = pc + geom_bar(width = 1, stat = "identity") |
| 338 pc = pc + coord_polar(theta="y") | 340 pc = pc + coord_polar(theta="y") |
| 339 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG", "( n =", sum(genesForPlot$Freq), ")")) | 341 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG subclasses", "( n =", sum(genesForPlot$Freq), ")")) |
| 340 | 342 write.table(genesForPlot, "cg.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 341 | 343 |
| 342 png(filename="cg.png") | 344 png(filename="cg.png") |
| 343 print(pc) | 345 print(pc) |
| 344 dev.off() | 346 dev.off() |
| 345 } | 347 } |
| 346 | 348 |
| 347 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) | 349 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) |
| 348 | 350 |
| 349 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales) | 351 p = ggplot(dat, aes(best_match, percentage_mutations)) |
| 350 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter") | 352 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter") |
| 351 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") | 353 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") |
| 354 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) | |
| 355 | |
| 352 | 356 |
| 353 png(filename="scatter.png") | 357 png(filename="scatter.png") |
| 354 print(p) | 358 print(p) |
| 355 dev.off() | 359 dev.off() |
| 356 | 360 |
