Mercurial > repos > davidvanzessen > mutation_analysis
comparison aa_histogram.r @ 110:ade5cf6fd2dc draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 02 Aug 2016 08:30:23 -0400 |
| parents | 0096cd454380 |
| children |
comparison
equal
deleted
inserted
replaced
| 109:0096cd454380 | 110:ade5cf6fd2dc |
|---|---|
| 13 | 13 |
| 14 mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") | 14 mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") |
| 15 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") | 15 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") |
| 16 | 16 |
| 17 for(gene in genes){ | 17 for(gene in genes){ |
| 18 | |
| 19 if(gene == ""){ | |
| 20 mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] | |
| 21 absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] | |
| 22 } else { | |
| 23 mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] | |
| 24 absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] | |
| 25 } | |
| 26 if(nrow(mutations.by.id.gene) == 0){ | |
| 27 next | |
| 28 } | |
| 29 | |
| 30 print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) | |
| 31 | |
| 32 mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) | |
| 33 aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) | |
| 34 | 18 |
| 35 dat_freq = mutations.at.position / aa.at.position | 19 if(gene == ""){ |
| 36 dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) | 20 mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] |
| 21 absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] | |
| 22 } else { | |
| 23 mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] | |
| 24 absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] | |
| 25 } | |
| 26 print(paste("nrow", gene, nrow(absent.aa.by.id.gene))) | |
| 27 if(nrow(mutations.by.id.gene) == 0){ | |
| 28 next | |
| 29 } | |
| 37 | 30 |
| 38 print("---------------- plot ----------------") | 31 mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) |
| 32 aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) | |
| 39 | 33 |
| 40 m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 34 dat_freq = mutations.at.position / aa.at.position |
| 41 m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=1:length(dat_freq), labels=1:length(dat_freq)) | 35 dat_freq[is.na(dat_freq)] = 0 |
| 42 m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") | 36 dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) |
| 43 m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") | |
| 44 m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") | |
| 45 m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") | |
| 46 m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") | |
| 47 m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) | |
| 48 | 37 |
| 49 print("---------------- write/print ----------------") | 38 print("---------------- plot ----------------") |
| 50 | 39 |
| 51 png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) | 40 m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 52 print(m) | 41 m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=dat_dt$i, labels=dat_dt$i) |
| 53 dev.off() | 42 m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") |
| 54 | 43 m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") |
| 55 dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) | 44 m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") |
| 56 | 45 m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") |
| 57 write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 46 m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") |
| 58 write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 47 m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) |
| 59 write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 48 |
| 60 write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 49 print("---------------- write/print ----------------") |
| 50 | |
| 51 png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) | |
| 52 print(m) | |
| 53 dev.off() | |
| 54 | |
| 55 dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position) | |
| 56 | |
| 57 write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | |
| 58 write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | |
| 59 write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | |
| 60 write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | |
| 61 } | 61 } |
