Mercurial > repos > davidvanzessen > plotting_merged
comparison RScript.r @ 36:b3c97b26db60 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 08 Nov 2013 06:51:37 -0500 |
parents | bd1116ba4ee1 |
children | 16fe6233f90e |
comparison
equal
deleted
inserted
replaced
35:bd1116ba4ee1 | 36:b3c97b26db60 |
---|---|
4 | 4 |
5 inFile = args[1] | 5 inFile = args[1] |
6 outFile = args[2] | 6 outFile = args[2] |
7 outDir = args[3] | 7 outDir = args[3] |
8 | 8 |
9 if (!require("gridExtra")) { | 9 if (!("gridExtra" %in% rownames(installed.packages()))) { |
10 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") | 10 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") |
11 } | 11 } |
12 library(gridExtra) | 12 library(gridExtra) |
13 if (!require("ggplot2")) { | 13 if (!("ggplot2" %in% rownames(installed.packages()))) { |
14 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | 14 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") |
15 } | 15 } |
16 require(ggplot2) | 16 require(ggplot2) |
17 if (!require("plyr")) { | 17 if (!("plyr" %in% rownames(installed.packages()))) { |
18 install.packages("plyr", repos="http://cran.xl-mirror.nl/") | 18 install.packages("plyr", repos="http://cran.xl-mirror.nl/") |
19 } | 19 } |
20 require(plyr) | 20 require(plyr) |
21 | |
22 if (!("data.table" %in% rownames(installed.packages()))) { | |
23 install.packages("data.table", repos="http://cran.xl-mirror.nl/") | |
24 } | |
25 library(data.table) | |
26 | |
21 | 27 |
22 test = read.table(inFile, sep="\t", header=TRUE) | 28 test = read.table(inFile, sep="\t", header=TRUE) |
23 | 29 |
24 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) | 30 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) |
25 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) | 31 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) |
29 | 35 |
30 PROD = test[test$VDJ.Frame != "In-frame with stop codon" & test$VDJ.Frame != "Out-of-frame" & test$CDR3.Found.How != "NOT_FOUND" , ] | 36 PROD = test[test$VDJ.Frame != "In-frame with stop codon" & test$VDJ.Frame != "Out-of-frame" & test$CDR3.Found.How != "NOT_FOUND" , ] |
31 | 37 |
32 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] | 38 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] |
33 | 39 |
34 PRODF = PROD[ -1] | 40 #PRODF = PROD[ -1] |
41 | |
42 PRODF = PROD | |
35 | 43 |
36 #PRODF = unique(PRODF) | 44 #PRODF = unique(PRODF) |
37 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | 45 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] |
38 | 46 |
39 | 47 PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")]) |
40 uniqueCount = split(PRODF, f=PRODF[,"Sample"]) | |
41 | |
42 for(i in 1:length(uniqueCount)) { | |
43 dat = data.frame(uniqueCount[i]) | |
44 sample = paste(unique(dat[,15])) | |
45 uniqueCount[sample] = length(dat[,1]) | |
46 } | |
47 | |
48 PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) | |
49 PRODFV$Length = as.numeric(PRODFV$Length) | 48 PRODFV$Length = as.numeric(PRODFV$Length) |
50 Total = 0 | 49 Total = 0 |
51 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 50 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
52 PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 51 PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
53 PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total)) | 52 PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total)) |
54 | 53 |
55 PRODFD = ddply(PRODF, c("Sample", "Top.D.Gene"), function(x) summary(x$VDJCDR3)) | 54 PRODFD = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.D.Gene")]) |
56 PRODFD$Length = as.numeric(PRODFD$Length) | 55 PRODFD$Length = as.numeric(PRODFD$Length) |
57 Total = 0 | 56 Total = 0 |
58 Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 57 Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
59 PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 58 PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
60 PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total)) | 59 PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total)) |
61 | 60 |
62 PRODFJ = ddply(PRODF, c("Sample", "Top.J.Gene"), function(x) summary(x$VDJCDR3)) | 61 PRODFJ = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.J.Gene")]) |
63 PRODFJ$Length = as.numeric(PRODFJ$Length) | 62 PRODFJ$Length = as.numeric(PRODFJ$Length) |
64 Total = 0 | 63 Total = 0 |
65 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 64 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
66 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 65 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
67 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) | 66 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) |
114 | 113 |
115 plotVD <- function(dat){ | 114 plotVD <- function(dat){ |
116 img = ggplot() + | 115 img = ggplot() + |
117 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | 116 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + |
118 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 117 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
119 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 118 scale_fill_gradient(low="gold", high="blue", na.value="white", limits=c(0, maxVD)) + |
120 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + | 119 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + |
121 xlab("D genes") + | 120 xlab("D genes") + |
122 ylab("V Genes") | 121 ylab("V Genes") |
123 | 122 |
124 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 123 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) |
125 print(img) | 124 print(img) |
126 dev.off() | 125 dev.off() |
127 } | 126 } |
128 | 127 |
129 | 128 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) |
130 VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | 129 |
130 | |
131 | |
132 | |
131 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) | 133 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) |
132 | 134 |
133 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) | 135 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) |
134 completeVD$Length = as.numeric(completeVD$Length) | 136 completeVD$Length = as.numeric(completeVD$Length) |
135 completeVD$log = log(completeVD$Length) | 137 completeVD$log = log(completeVD$Length) |
138 maxVD = max(subset(completeVD, !is.na(completeVD$log), "log")) | |
136 completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | 139 completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) |
137 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 140 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
138 #completeVD$log[is.na(completeVD$log)] = 0 | 141 #completeVD$log[is.na(completeVD$log)] = 0 |
139 l = split(completeVD, f=completeVD[,"Sample"]) | 142 l = split(completeVD, f=completeVD[,"Sample"]) |
140 | 143 |
145 plotVJ <- function(dat){ | 148 plotVJ <- function(dat){ |
146 img = ggplot() + | 149 img = ggplot() + |
147 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | 150 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + |
148 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 151 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
149 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 152 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
150 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + | 153 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + |
151 xlab("J genes") + | 154 xlab("J genes") + |
152 ylab("V Genes") | 155 ylab("V Genes") |
153 | 156 |
154 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 157 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) |
155 print(img) | 158 print(img) |
156 dev.off() | 159 dev.off() |
157 } | 160 } |
158 | 161 |
159 VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | 162 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) |
163 #VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
160 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | 164 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) |
161 | 165 |
162 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) | 166 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) |
163 completeVJ$Length = as.numeric(completeVJ$Length) | 167 completeVJ$Length = as.numeric(completeVJ$Length) |
164 completeVJ$log = log(completeVJ$Length) | 168 completeVJ$log = log(completeVJ$Length) |
171 plotDJ <- function(dat){ | 175 plotDJ <- function(dat){ |
172 img = ggplot() + | 176 img = ggplot() + |
173 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + | 177 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + |
174 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 178 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
175 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 179 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
176 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + | 180 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + |
177 xlab("J genes") + | 181 xlab("J genes") + |
178 ylab("D Genes") | 182 ylab("D Genes") |
179 | 183 |
180 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 184 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) |
181 print(img) | 185 print(img) |
182 dev.off() | 186 dev.off() |
183 } | 187 } |
184 | 188 |
185 DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | 189 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) |
190 #DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
186 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | 191 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) |
187 | 192 |
188 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) | 193 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) |
189 completeDJ$Length = as.numeric(completeDJ$Length) | 194 completeDJ$Length = as.numeric(completeDJ$Length) |
190 completeDJ$log = log(completeDJ$Length) | 195 completeDJ$log = log(completeDJ$Length) |