Mercurial > repos > davidvanzessen > mutation_analysis
comparison aa_histogram.r @ 107:01c9993865af draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 13 Jul 2016 08:04:13 -0400 |
parents | e4957ad476a2 |
children | 6add3e66f4fa |
comparison
equal
deleted
inserted
replaced
106:074ae1e30e8f | 107:01c9993865af |
---|---|
1 library(ggplot2) | 1 library(ggplot2) |
2 | 2 |
3 args <- commandArgs(trailingOnly = TRUE) | 3 args <- commandArgs(trailingOnly = TRUE) |
4 | 4 |
5 input = args[1] | 5 mutations.by.id.file = args[1] |
6 outfile = args[2] | 6 absent.aa.by.id.file = args[2] |
7 gene = args[3] | 7 genes = strsplit(args[3], ",")[[1]] |
8 genes = c(genes, "") | |
9 outdir = args[4] | |
10 | |
8 | 11 |
9 print("---------------- read input ----------------") | 12 print("---------------- read input ----------------") |
10 | 13 |
11 dat = read.table(input, sep="\t", fill=T, header=T, quote="") | 14 mutations.by.id = read.table(mutations.by.id.file, sep="\t", fill=T, header=T, quote="") |
15 absent.aa.by.id = read.table(absent.aa.by.id.file, sep="\t", fill=T, header=T, quote="") | |
12 | 16 |
13 print("---------------- as numeric ----------------") | 17 for(gene in genes){ |
18 | |
19 if(gene == ""){ | |
20 mutations.by.id.gene = mutations.by.id[!grepl("unmatched", mutations.by.id$best_match),] | |
21 absent.aa.by.id.gene = absent.aa.by.id[!grepl("unmatched", absent.aa.by.id$best_match),] | |
22 } else { | |
23 mutations.by.id.gene = mutations.by.id[grepl(paste("^", gene, sep=""), mutations.by.id$best_match),] | |
24 absent.aa.by.id.gene = absent.aa.by.id[grepl(paste("^", gene, sep=""), absent.aa.by.id$best_match),] | |
25 } | |
26 if(nrow(mutations.by.id.gene) == 0){ | |
27 next | |
28 } | |
14 | 29 |
15 mutations.at.position = as.numeric(dat[1,]) | 30 mutations.at.position = colSums(mutations.by.id.gene[,-c(1,2)]) |
16 aa.at.position = as.numeric(dat[2,]) | 31 aa.at.position = colSums(absent.aa.by.id.gene[,-c(1,2,3,4)]) |
17 | 32 |
18 print("---------------- freq data.frame ----------------") | 33 dat_freq = mutations.at.position / aa.at.position |
34 dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) | |
19 | 35 |
20 dat_freq = mutations.at.position / aa.at.position | 36 print("---------------- plot ----------------") |
21 dat_dt = data.frame(i=1:length(dat_freq), freq=dat_freq) | |
22 | 37 |
23 print("---------------- plot ----------------") | 38 m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
39 m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=1:length(dat_freq), labels=1:length(dat_freq)) | |
40 m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") | |
41 m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") | |
42 m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") | |
43 m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") | |
44 m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") | |
45 m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) | |
24 | 46 |
25 m = ggplot(dat_dt, aes(x=i, y=freq)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 47 print("---------------- write/print ----------------") |
26 m = m + geom_bar(stat="identity", colour = "black", fill = "darkgrey", alpha=0.8) + scale_x_continuous(breaks=1:length(dat_freq), labels=1:length(dat_freq)) | |
27 m = m + annotate("segment", x = 0.5, y = -0.05, xend=26.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 13, y = -0.1, label="FR1") | |
28 m = m + annotate("segment", x = 26.5, y = -0.07, xend=38.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 32.5, y = -0.15, label="CDR1") | |
29 m = m + annotate("segment", x = 38.5, y = -0.05, xend=55.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 47, y = -0.1, label="FR2") | |
30 m = m + annotate("segment", x = 55.5, y = -0.07, xend=65.5, yend=-0.07, colour="darkblue", size=1) + annotate("text", x = 60.5, y = -0.15, label="CDR2") | |
31 m = m + annotate("segment", x = 65.5, y = -0.05, xend=104.5, yend=-0.05, colour="darkgreen", size=1) + annotate("text", x = 85, y = -0.1, label="FR3") | |
32 m = m + expand_limits(y=c(-0.1,1)) + xlab("AA position") + ylab("Frequency") + ggtitle(paste(gene, "AA mutation frequency")) | |
33 | 48 |
34 print("---------------- write/print ----------------") | 49 write.table(dat_dt, paste(outdir, "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
35 | 50 png(filename=paste(outdir, "/aa_histogram_", gene, ".png", sep=""), width=1280, height=720) |
36 write.table(dat_dt, paste(dirname(outfile), "/aa_histogram_", gene, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 51 print(m) |
37 png(filename=outfile, width=1280, height=720) | 52 dev.off() |
38 print(m) | 53 } |
39 dev.off() |