Mercurial > repos > davidvanzessen > mutation_analysis
diff aa_histogram.r @ 107:01c9993865af draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 13 Jul 2016 08:04:13 -0400 |
parents | e4957ad476a2 |
children | 6add3e66f4fa |
line wrap: on
line diff
--- a/aa_histogram.r Wed Jun 29 05:13:25 2016 -0400 +++ b/aa_histogram.r Wed Jul 13 08:04:13 2016 -0400 @@ -2,38 +2,52 @@ args <- commandArgs(trailingOnly = TRUE) -input = args[1] -outfile = args[2] -gene = args[3] +mutations.by.id.file = args[1] +absent.aa.by.id.file = args[2] +genes = strsplit(args[3], ",")[[1]] +genes = c(genes, "") +outdir = args[4] + print("---------------- read input ----------------") -dat = read.table(input, sep="\t", fill=T, header=T, quote="") - -print("---------------- as numeric ----------------") - -mutations.at.position = as.numeric(dat[1,]) -aa.at.position = as.numeric(dat[2,]) +mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") +absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") -print("---------------- freq data.frame ----------------") +for(gene in genes){ + + if(gene == ""){ + mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] + absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] + } else { + mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] + absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] + } + if(nrow(mutations.by.id.gene) == 0){ + next + } -dat_freq = mutations.at.position / aa.at.position -dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) - -print("---------------- plot ----------------") + mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) + aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) -m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) -m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=1:length(dat_freq), labels=1:length(dat_freq)) -m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") -m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") -m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") -m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") -m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") -m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) + dat_freq = mutations.at.position / aa.at.position + dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) + + print("---------------- plot ----------------") -print("---------------- write/print ----------------") + m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=1:length(dat_freq), labels=1:length(dat_freq)) + m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") + m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") + m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") + m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") + m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") + m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) -write.table(dat_dt, paste(dirname(outfile), "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) -png(filename=outfile, width=1280, height=720) -print(m) -dev.off() + print("---------------- write/print ----------------") + + write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) + print(m) + dev.off() +}