Mercurial > repos > davidvanzessen > plotting_merged
comparison RScript.r @ 38:6e490e056fc4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 14 Nov 2013 06:57:35 -0500 |
parents | 16fe6233f90e |
children | a9ad03d52680 |
comparison
equal
deleted
inserted
replaced
37:16fe6233f90e | 38:6e490e056fc4 |
---|---|
23 install.packages("data.table", repos="http://cran.xl-mirror.nl/") | 23 install.packages("data.table", repos="http://cran.xl-mirror.nl/") |
24 } | 24 } |
25 library(data.table) | 25 library(data.table) |
26 | 26 |
27 | 27 |
28 test = read.table(inFile, sep="\t", header=TRUE) | 28 test = read.table(inFile, sep="\t", header=TRUE, fill=T) |
29 | 29 |
30 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) | 30 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) |
31 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) | 31 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) |
32 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) | 32 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) |
33 | 33 |
108 | 108 |
109 png("JPlot.png",width = 800, height = 600) | 109 png("JPlot.png",width = 800, height = 600) |
110 pJ | 110 pJ |
111 dev.off(); | 111 dev.off(); |
112 | 112 |
113 revVchain = Vchain | |
114 revDchain = Dchain | |
115 revVchain$chr.orderV = rev(revVchain$chr.orderV) | |
116 revDchain$chr.orderD = rev(revDchain$chr.orderD) | |
113 | 117 |
114 plotVD <- function(dat){ | 118 plotVD <- function(dat){ |
115 img = ggplot() + | 119 img = ggplot() + |
116 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | 120 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + |
117 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 121 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
118 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 122 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
119 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 123 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
120 xlab("D genes") + | 124 xlab("D genes") + |
121 ylab("V Genes") | 125 ylab("V Genes") |
122 | 126 |
123 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 127 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) |
124 print(img) | 128 print(img) |
125 dev.off() | 129 dev.off() |
126 } | 130 } |
127 | 131 |
128 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | 132 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) |
133 | |
134 VandDCount$l = log(VandDCount$Length) | |
135 maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")]) | |
136 VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T) | |
137 VandDCount$relLength = VandDCount$l / VandDCount$max | |
138 | |
129 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) | 139 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) |
130 | 140 |
131 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) | 141 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) |
132 completeVD$Length = as.numeric(completeVD$Length) | 142 completeVD = merge(completeVD, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) |
133 completeVD$log = log(completeVD$Length) | |
134 completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | |
135 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 143 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
136 #completeVD$log[is.na(completeVD$log)] = 0 | 144 VDList = split(completeVD, f=completeVD[,"Sample"]) |
137 l = split(completeVD, f=completeVD[,"Sample"]) | 145 |
138 | 146 lapply(VDList, FUN=plotVD) |
139 lapply(l, FUN=plotVD) | |
140 | 147 |
141 | 148 |
142 | 149 |
143 plotVJ <- function(dat){ | 150 plotVJ <- function(dat){ |
144 img = ggplot() + | 151 img = ggplot() + |
145 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | 152 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + |
146 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 153 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
147 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 154 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
148 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 155 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
149 xlab("J genes") + | 156 xlab("J genes") + |
150 ylab("V Genes") | 157 ylab("V Genes") |
151 | 158 |
152 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 159 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) |
153 print(img) | 160 print(img) |
154 dev.off() | 161 dev.off() |
155 } | 162 } |
156 | 163 |
157 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | 164 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) |
158 #VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | 165 |
166 VandJCount$l = log(VandJCount$Length) | |
167 maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")]) | |
168 VandJCount = merge(VandJCount, maxVJ, by.x="Sample", by.y="Sample", all.x=T) | |
169 VandJCount$relLength = VandJCount$l / VandJCount$max | |
170 | |
159 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | 171 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) |
160 | 172 |
161 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) | 173 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) |
162 completeVJ$Length = as.numeric(completeVJ$Length) | 174 completeVJ = merge(completeVJ, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) |
163 completeVJ$log = log(completeVJ$Length) | |
164 completeVJ = merge(completeVJ, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | |
165 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | 175 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) |
166 #completeVJ$log[is.na(completeVJ$log)] = 0 | 176 VJList = split(completeVJ, f=completeVJ[,"Sample"]) |
167 l = split(completeVJ, f=completeVJ[,"Sample"]) | 177 lapply(VJList, FUN=plotVJ) |
168 lapply(l, FUN=plotVJ) | |
169 | 178 |
170 plotDJ <- function(dat){ | 179 plotDJ <- function(dat){ |
171 img = ggplot() + | 180 img = ggplot() + |
172 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + | 181 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=relLength)) + |
173 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 182 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
174 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 183 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
175 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 184 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
176 xlab("J genes") + | 185 xlab("J genes") + |
177 ylab("D Genes") | 186 ylab("D Genes") |
178 | 187 |
179 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 188 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) |
180 print(img) | 189 print(img) |
181 dev.off() | 190 dev.off() |
182 } | 191 } |
183 | 192 |
184 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 193 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) |
185 #DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | 194 |
195 DandJCount$l = log(DandJCount$Length) | |
196 maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")]) | |
197 DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T) | |
198 DandJCount$relLength = DandJCount$l / DandJCount$max | |
199 | |
186 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | 200 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) |
187 | 201 |
188 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) | 202 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) |
189 completeDJ$Length = as.numeric(completeDJ$Length) | 203 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
190 completeDJ$log = log(completeDJ$Length) | |
191 completeDJ = merge(completeDJ, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | |
192 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | 204 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) |
193 #completeDJ$log[is.na(completeDJ$log)] = 0 | 205 DJList = split(completeDJ, f=completeDJ[,"Sample"]) |
194 l = split(completeDJ, f=completeDJ[,"Sample"]) | 206 lapply(DJList, FUN=plotDJ) |
195 lapply(l, FUN=plotDJ) | |
196 | 207 |
197 | 208 |
198 sampleFile <- file("samples.txt") | 209 sampleFile <- file("samples.txt") |
199 un = unique(test$Sample) | 210 un = unique(test$Sample) |
200 un = paste(un, sep="\n") | 211 un = paste(un, sep="\n") |