Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 110:ade5cf6fd2dc draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 02 Aug 2016 08:30:23 -0400 |
| parents | 5ffbf40cdd4b |
| children | e7b550d52eb7 |
comparison
equal
deleted
inserted
replaced
| 109:0096cd454380 | 110:ade5cf6fd2dc |
|---|---|
| 305 } | 305 } |
| 306 | 306 |
| 307 write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) | 307 write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) |
| 308 } | 308 } |
| 309 | 309 |
| 310 sum.table = read.table("mutations_sum.txt", sep=",", header=F) | |
| 311 median.table = read.table("mutations_median.txt", sep=",", header=F) | |
| 312 | |
| 313 #sum.table["Median of Number of Mutations (%)",] = median.table[1,] | |
| 314 | |
| 315 new.table = sum.table[1,] | |
| 316 new.table[2,] = median.table[1,] | |
| 317 new.table[3:12,] = sum.table[2:11,] | |
| 318 new.table[,1] = as.character(new.table[,1]) | |
| 319 new.table[2,1] = "Median of Number of Mutations (%)" | |
| 320 | |
| 321 #sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] | |
| 322 | |
| 323 write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) | |
| 324 | |
| 325 | |
| 310 | 326 |
| 311 if (!("ggplot2" %in% rownames(installed.packages()))) { | 327 if (!("ggplot2" %in% rownames(installed.packages()))) { |
| 312 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | 328 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") |
| 313 } | 329 } |
| 314 | 330 |
| 315 genesForPlot = gsub("[0-9]", "", dat$best_match) | 331 dat = dat[!grepl("^unmatched", dat$best_match),] |
| 316 genesForPlot = data.frame(table(genesForPlot)) | |
| 317 colnames(genesForPlot) = c("Gene","Freq") | |
| 318 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | |
| 319 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T) | |
| 320 | |
| 321 | |
| 322 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | |
| 323 pc = pc + geom_bar(width = 1, stat = "identity") | |
| 324 pc = pc + coord_polar(theta="y") | |
| 325 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")")) | |
| 326 | |
| 327 png(filename="all.png") | |
| 328 pc | |
| 329 dev.off() | |
| 330 | 332 |
| 331 #blegh | 333 #blegh |
| 332 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match | 334 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match |
| 333 if(length(genesForPlot) > 0){ | 335 if(length(genesForPlot) > 0){ |
| 334 genesForPlot = data.frame(table(genesForPlot)) | 336 genesForPlot = data.frame(table(genesForPlot)) |
| 375 | 377 |
| 376 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) | 378 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 377 | 379 |
| 378 write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T) | 380 write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T) |
| 379 | 381 |
| 380 | |
| 381 | |
| 382 | |
| 383 | |
| 384 | |
| 385 dat$best_match_class = substr(dat$best_match, 0, 2) | 382 dat$best_match_class = substr(dat$best_match, 0, 2) |
| 386 freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") | 383 freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") |
| 387 dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels) | 384 dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels) |
| 388 | 385 |
| 389 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match_class", "frequency_bins")]) | 386 frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match_class", "frequency_bins")]) |
