annotate aa_histogram.r @ 109:0096cd454380 draft

Uploaded
author davidvanzessen
date Thu, 14 Jul 2016 07:29:56 -0400
parents 6add3e66f4fa
children ade5cf6fd2dc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
26
2433a1e110e1 Uploaded
davidvanzessen
parents:
diff changeset
1 library(ggplot2)
2433a1e110e1 Uploaded
davidvanzessen
parents:
diff changeset
2
2433a1e110e1 Uploaded
davidvanzessen
parents:
diff changeset
3 args <- commandArgs(trailingOnly = TRUE)
2433a1e110e1 Uploaded
davidvanzessen
parents:
diff changeset
4
107
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
5 mutations.by.id.file = args[1]
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
6 absent.aa.by.id.file = args[2]
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
7 genes = strsplit(args[3], ",")[[1]]
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
8 genes = c(genes, "")
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
9 outdir = args[4]
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
10
26
2433a1e110e1 Uploaded
davidvanzessen
parents:
diff changeset
11
56
a4317b006d70 Uploaded
davidvanzessen
parents: 51
diff changeset
12 print("---------------- read input ----------------")
a4317b006d70 Uploaded
davidvanzessen
parents: 51
diff changeset
13
107
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
14 mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="")
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
15 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="")
26
2433a1e110e1 Uploaded
davidvanzessen
parents:
diff changeset
16
107
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
17 for(gene in genes){
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
18
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
19 if(gene == ""){
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
20 mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),]
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
21 absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),]
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
22 } else {
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
23 mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),]
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
24 absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),]
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
25 }
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
26 if(nrow(mutations.by.id.gene) == 0){
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
27 next
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
28 }
108
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
29
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
30 print(paste("nrow", gene, nrow(absent.aa.by.id.gene)))
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
31
107
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
32 mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)])
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
33 aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)])
56
a4317b006d70 Uploaded
davidvanzessen
parents: 51
diff changeset
34
107
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
35 dat_freq = mutations.at.position / aa.at.position
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
36 dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq)
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
37
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
38 print("---------------- plot ----------------")
57
cb66d6dd1e66 Uploaded
davidvanzessen
parents: 56
diff changeset
39
107
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
40 m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
41 m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=1:length(dat_freq), labels=1:length(dat_freq))
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
42 m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1")
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
43 m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1")
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
44 m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2")
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
45 m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2")
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
46 m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3")
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
47 m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency"))
57
cb66d6dd1e66 Uploaded
davidvanzessen
parents: 56
diff changeset
48
107
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
49 print("---------------- write/print ----------------")
108
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
50
109
0096cd454380 Uploaded
davidvanzessen
parents: 108
diff changeset
51 png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720)
0096cd454380 Uploaded
davidvanzessen
parents: 108
diff changeset
52 print(m)
0096cd454380 Uploaded
davidvanzessen
parents: 108
diff changeset
53 dev.off()
0096cd454380 Uploaded
davidvanzessen
parents: 108
diff changeset
54
108
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
55 dat.sums = data.frame(index=1:length(mutations.at.position), mutations.at.position=mutations.at.position, aa.at.position=aa.at.position)
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
56
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
57 write.table(dat.sums, paste(outdir, "/aa_histogram_sum_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
58 write.table(mutations.by.id.gene, paste(outdir, "/aa_histogram_count_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
6add3e66f4fa Uploaded
davidvanzessen
parents: 107
diff changeset
59 write.table(absent.aa.by.id.gene, paste(outdir, "/aa_histogram_absent_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
107
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
60 write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
01c9993865af Uploaded
davidvanzessen
parents: 105
diff changeset
61 }