Mercurial > repos > davidvanzessen > mutation_analysis
changeset 118:ad7ca9c2b748 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 11 Aug 2016 08:00:00 -0400 |
parents | a8f91c52411c |
children | 626a956f3811 |
files | mutation_analysis.r |
diffstat | 1 files changed, 22 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/mutation_analysis.r Wed Aug 10 09:10:45 2016 -0400 +++ b/mutation_analysis.r Thu Aug 11 08:00:00 2016 -0400 @@ -105,6 +105,8 @@ "FR2.IMGT.Nb.of.nonsilent.mutations", "FR3.IMGT.Nb.of.nonsilent.mutations") + +print("Cleaning up columns") for(col in cleanup_columns){ dat[,col] = gsub("\\(.*\\)", "", dat[,col]) #dat[dat[,col] == "",] = "0" @@ -119,6 +121,8 @@ sum_by_row = function(x, columns) { sum(as.numeric(x[columns]), na.rm=T) } +print("aggregating data into new columns") + VRegionMutations_columns = paste(regions, ".IMGT.Nb.of.mutations", sep="") dat$VRegionMutations = apply(dat, FUN=sum_by_row, 1, columns=VRegionMutations_columns) @@ -304,6 +308,8 @@ funcs = c(median, sum, mean) fnames = c("median", "sum", "mean") +print("Creating result tables") + for(i in 1:length(funcs)){ func = funcs[[i]] fname = fnames[[i]] @@ -313,9 +319,10 @@ rows = 11 } matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=rows) - + for(i in 1:length(genes)){ - matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i]) + print(paste("Creating table for", fname, genes[i])) + matrx = calculate_result(i, genes[i], dat, matrx, func, fname, genes[i]) } matrx = calculate_result(i + 1, ".*", dat[!grepl("unmatched", dat$best_match),], matrx, func, fname, name="all") @@ -330,11 +337,11 @@ write.table(x=result, file=paste("mutations_", fname, ".txt", sep=""), sep=",",quote=F,row.names=T,col.names=F) } +print("Adding median number of mutations to sum table") + sum.table = read.table("mutations_sum.txt", sep=",", header=F) median.table = read.table("mutations_median.txt", sep=",", header=F) -#sum.table["Median of Number of Mutations (%)",] = median.table[1,] - new.table = sum.table[1,] new.table[2,] = median.table[1,] new.table[3:12,] = sum.table[2:11,] @@ -345,6 +352,9 @@ write.table(x=new.table, file="mutations_sum.txt", sep=",",quote=F,row.names=F,col.names=F) + +print("Plotting ca piechart") + dat = dat[!grepl("^unmatched", dat$best_match),] #blegh @@ -365,6 +375,8 @@ dev.off() } +print("Plotting cg piechart") + genesForPlot = dat[grepl("cg", dat$best_match),]$best_match if(length(genesForPlot) > 0){ genesForPlot = data.frame(table(genesForPlot)) @@ -382,6 +394,9 @@ dev.off() } + +print("Plotting scatterplot") + dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) p = ggplot(dat, aes(best_match, percentage_mutations)) @@ -396,6 +411,9 @@ write.table(dat, input, sep="\t",quote=F,row.names=F,col.names=T) + +print("Plotting frequency ranges plot") + dat$best_match_class = substr(dat$best_match, 0, 2) freq_labels = c("0", "0-2", "2-5", "5-10", "10-15", "15-20", "20") dat$frequency_bins = cut(dat$percentage_mutations, breaks=c(-Inf, 0, 2,5,10,15,20, Inf), labels=freq_labels)