Mercurial > repos > davidvanzessen > mutation_analysis
diff mutation_analysis.r @ 110:ade5cf6fd2dc draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 02 Aug 2016 08:30:23 -0400 |
parents | 5ffbf40cdd4b |
children | e7b550d52eb7 |
line wrap: on
line diff
--- a/mutation_analysis.r Thu Jul 14 07:29:56 2016 -0400 +++ b/mutation_analysis.r Tue Aug 02 08:30:23 2016 -0400 @@ -307,26 +307,28 @@ write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) } +sum.table = read.table("mutations_sum.txt", sep=",", header=F) +median.table = read.table("mutations_median.txt", sep=",", header=F) + +#sum.table["Median of Number of Mutations (%)",] = median.table[1,] + +new.table = sum.table[1,] +new.table[2,] = median.table[1,] +new.table[3:12,] = sum.table[2:11,] +new.table[,1] = as.character(new.table[,1]) +new.table[2,1] = "Median of Number of Mutations (%)" + +#sum.table = sum.table[c("Number of Mutations (%)", "Median of Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)", "Transitions at A T (%)", "Targeting of A T (%)", "FR R/S (ratio)", "CDR R/S (ratio)", "nt in FR", "nt in CDR"),] + +write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) + + if (!("ggplot2" %in% rownames(installed.packages()))) { install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") } -genesForPlot = gsub("[0-9]", "", dat$best_match) -genesForPlot = data.frame(table(genesForPlot)) -colnames(genesForPlot) = c("Gene","Freq") -genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) -write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T) - - -pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) -pc = pc + geom_bar(width = 1, stat = "identity") -pc = pc + coord_polar(theta="y") -pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")")) - -png(filename="all.png") -pc -dev.off() +dat = dat[!grepl("^unmatched", dat$best_match),] #blegh genesForPlot = dat[grepl("ca", dat$best_match),]$best_match @@ -377,11 +379,6 @@ write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T) - - - - - dat$best_match_class = substr(dat$best_match, 0, 2) freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels)