annotate pattern_plots.r @ 124:4a93146f87aa draft

Uploaded
author davidvanzessen
date Mon, 22 Aug 2016 09:11:17 -0400
parents 0453ea4d9f14
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
123
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
1 library(ggplot2)
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
2 library(reshape2)
124
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
3 library(scales)
123
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
4
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
5 args <- commandArgs(trailingOnly = TRUE)
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
6
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
7 input.file = args[1] #the data that's get turned into the "SHM overview" table in the html report "data_sum.txt"
124
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
8
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
9 plot1.path = args[2]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
10 plot1.png = paste(plot1.path, ".png", sep="")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
11 plot1.txt = paste(plot1.path, ".txt", sep="")
123
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
12
124
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
13 plot2.path = args[3]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
14 plot2.png = paste(plot2.path, ".png", sep="")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
15 plot2.txt = paste(plot2.path, ".txt", sep="")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
16
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
17 plot3.path = args[4]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
18 plot3.png = paste(plot3.path, ".png", sep="")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
19 plot3.txt = paste(plot3.path, ".txt", sep="")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
20
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
21 dat = read.table(input.file, header=F, sep=",", quote="", stringsAsFactors=F, fill=T, row.names=1)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
22
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
23
123
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
24
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
25 classes = c("ca", "ca1", "ca2", "cg", "cg1", "cg2", "cg3", "cg4", "cm")
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
26 xyz = c("x", "y", "z")
124
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
27 new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep="."))
123
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
28
124
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
29 names(dat) = new.names
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
30
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
31 dat["RGYW.WRCY",] = colSums(dat[c(13,14),])
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
32 dat["TW.WA",] = colSums(dat[c(15,16),])
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
33
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
34 data1 = dat[c("RGYW.WRCY", "TW.WA"),]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
35
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
36 data1 = data1[,names(data1)[grepl(".z", names(data1))]]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
37 names(data1) = gsub("\\..*", "", names(data1))
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
38
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
39 data1 = melt(t(data1))
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
40
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
41 names(data1) = c("Class", "Type", "value")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
42
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
43 write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
44
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
45 p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL))
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
46 png(filename=plot1.png)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
47 print(p)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
48 dev.off()
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
49
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
50 data2 = dat[5:8,]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
51
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
52 data2["sum",] = colSums(data2)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
53
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
54 data2 = data2[,names(data2)[grepl("\\.x", names(data2))]]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
55 names(data2) = gsub(".x", "", names(data2))
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
56
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
57 data2["A/T",] = round(colSums(data2[3:4,]) / data2["sum",] * 100, 1)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
58 data2["A/T",is.nan(unlist(data2["A/T",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
59
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
60 data2["G/C transversions",] = round(data2[2,] / data2["sum",] * 100, 1)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
61 data2["G/C transitions",] = round(data2[1,] / data2["sum",] * 100, 1)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
62
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
63
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
64 data2["G/C transversions",is.nan(unlist(data2["G/C transversions",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
65 data2["G/C transversions",is.infinite(unlist(data2["G/C transversions",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
66 data2["G/C transitions",is.nan(unlist(data2["G/C transitions",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
67 data2["G/C transitions",is.infinite(unlist(data2["G/C transitions",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
68
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
69 data2 = melt(t(data2[6:8,]))
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
70
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
71 names(data2) = c("Class", "Type", "value")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
72
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
73 write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
74
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
75 p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
76 png(filename=plot2.png)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
77 print(p)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
78 dev.off()
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
79
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
80 data3 = dat[c(5, 6, 8, 17:20),]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
81 data3 = data3[,names(data3)[grepl("\\.x", names(data3))]]
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
82 names(data3) = gsub(".x", "", names(data3))
123
0453ea4d9f14 Uploaded
davidvanzessen
parents:
diff changeset
83
124
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
84 data3["G/C transitions",] = round(data3[1,] / (data3[5,] + data3[7,]) * 100, 1)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
85
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
86 data3["G/C transversions",] = round(data3[2,] / (data3[5,] + data3[7,]) * 100, 1)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
87
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
88 data3["A/T",] = round(data3[3,] / (data3[4,] + data3[6,]) * 100, 1)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
89
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
90 data3["G/C transitions",is.nan(unlist(data3["G/C transitions",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
91 data3["G/C transitions",is.infinite(unlist(data3["G/C transitions",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
92
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
93 data3["G/C transversions",is.nan(unlist(data3["G/C transversions",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
94 data3["G/C transversions",is.infinite(unlist(data3["G/C transversions",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
95
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
96 data3["A/T",is.nan(unlist(data3["A/T",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
97 data3["A/T",is.infinite(unlist(data3["A/T",]))] = 0
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
98
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
99 data3 = melt(t(data3[8:10,]))
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
100 names(data3) = c("Class", "Type", "value")
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
101
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
102 write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
103
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
104 p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL))
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
105 png(filename=plot3.png)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
106 print(p)
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
107 dev.off()
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
108
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
109
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
110
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
111
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
112
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
113
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
114
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
115
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
116
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
117
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
118
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
119
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
120
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
121
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
122
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
123
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
124
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
125
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
126
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
127
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
128
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
129
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
130
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
131
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
132
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
133
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
134
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
135
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
136
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
137
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
138
4a93146f87aa Uploaded
davidvanzessen
parents: 123
diff changeset
139