comparison mutation_analysis.r @ 26:2433a1e110e1 draft

Uploaded
author davidvanzessen
date Wed, 08 Apr 2015 05:25:52 -0400
parents 58a62d2c0377
children ac9a4307861a
comparison
equal deleted inserted replaced
25:58a62d2c0377 26:2433a1e110e1
146 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns) 146 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns)
147 147
148 148
149 setwd(outputdir) 149 setwd(outputdir)
150 150
151 nts = c("a", "t", "g", "c") 151 nts = c("a", "c", "g", "t")
152 zeros=rep(0, 4) 152 zeros=rep(0, 4)
153 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7) 153 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7)
154 for(i in 1:length(genes)){ 154 for(i in 1:length(genes)){
155 gene = genes[i] 155 gene = genes[i]
156 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] 156 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),]
296 296
297 genesForPlot = gsub("[0-9]", "", dat$best_match) 297 genesForPlot = gsub("[0-9]", "", dat$best_match)
298 genesForPlot = data.frame(table(genesForPlot)) 298 genesForPlot = data.frame(table(genesForPlot))
299 colnames(genesForPlot) = c("Gene","Freq") 299 colnames(genesForPlot) = c("Gene","Freq")
300 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) 300 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
301 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T)
302
301 303
302 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) 304 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
303 pc = pc + geom_bar(width = 1, stat = "identity") 305 pc = pc + geom_bar(width = 1, stat = "identity")
304 pc = pc + coord_polar(theta="y") 306 pc = pc + coord_polar(theta="y")
305 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) 307 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")"))
306 308
307 png(filename="all.png") 309 png(filename="all.png")
308 pc 310 pc
309 dev.off() 311 dev.off()
310 312
317 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) 319 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
318 320
319 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) 321 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
320 pc = pc + geom_bar(width = 1, stat = "identity") 322 pc = pc + geom_bar(width = 1, stat = "identity")
321 pc = pc + coord_polar(theta="y") 323 pc = pc + coord_polar(theta="y")
322 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) 324 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA subclasses", "( n =", sum(genesForPlot$Freq), ")"))
323 325 write.table(genesForPlot, "ca.txt", sep="\t",quote=F,row.names=F,col.names=T)
324 326
325 png(filename="ca.png") 327 png(filename="ca.png")
326 print(pc) 328 print(pc)
327 dev.off() 329 dev.off()
328 } 330 }
334 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) 336 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
335 337
336 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) 338 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
337 pc = pc + geom_bar(width = 1, stat = "identity") 339 pc = pc + geom_bar(width = 1, stat = "identity")
338 pc = pc + coord_polar(theta="y") 340 pc = pc + coord_polar(theta="y")
339 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG", "( n =", sum(genesForPlot$Freq), ")")) 341 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG subclasses", "( n =", sum(genesForPlot$Freq), ")"))
340 342 write.table(genesForPlot, "cg.txt", sep="\t",quote=F,row.names=F,col.names=T)
341 343
342 png(filename="cg.png") 344 png(filename="cg.png")
343 print(pc) 345 print(pc)
344 dev.off() 346 dev.off()
345 } 347 }
346 348
347 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) 349 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2)
348 350
349 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales) 351 p = ggplot(dat, aes(best_match, percentage_mutations))
350 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter") 352 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter")
351 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") 353 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot")
354 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
355
352 356
353 png(filename="scatter.png") 357 png(filename="scatter.png")
354 print(p) 358 print(p)
355 dev.off() 359 dev.off()
356 360