Mercurial > repos > davidvanzessen > plotting_merged
comparison RScript.r @ 38:6e490e056fc4 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Thu, 14 Nov 2013 06:57:35 -0500 |
| parents | 16fe6233f90e |
| children | a9ad03d52680 |
comparison
equal
deleted
inserted
replaced
| 37:16fe6233f90e | 38:6e490e056fc4 |
|---|---|
| 23 install.packages("data.table", repos="http://cran.xl-mirror.nl/") | 23 install.packages("data.table", repos="http://cran.xl-mirror.nl/") |
| 24 } | 24 } |
| 25 library(data.table) | 25 library(data.table) |
| 26 | 26 |
| 27 | 27 |
| 28 test = read.table(inFile, sep="\t", header=TRUE) | 28 test = read.table(inFile, sep="\t", header=TRUE, fill=T) |
| 29 | 29 |
| 30 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) | 30 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) |
| 31 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) | 31 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) |
| 32 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) | 32 test$Top.J.Gene = gsub("[*]([0-9]+)", "", test$Top.J.Gene) |
| 33 | 33 |
| 108 | 108 |
| 109 png("JPlot.png",width = 800, height = 600) | 109 png("JPlot.png",width = 800, height = 600) |
| 110 pJ | 110 pJ |
| 111 dev.off(); | 111 dev.off(); |
| 112 | 112 |
| 113 revVchain = Vchain | |
| 114 revDchain = Dchain | |
| 115 revVchain$chr.orderV = rev(revVchain$chr.orderV) | |
| 116 revDchain$chr.orderD = rev(revDchain$chr.orderD) | |
| 113 | 117 |
| 114 plotVD <- function(dat){ | 118 plotVD <- function(dat){ |
| 115 img = ggplot() + | 119 img = ggplot() + |
| 116 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | 120 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + |
| 117 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 121 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 118 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 122 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 119 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 123 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
| 120 xlab("D genes") + | 124 xlab("D genes") + |
| 121 ylab("V Genes") | 125 ylab("V Genes") |
| 122 | 126 |
| 123 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 127 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) |
| 124 print(img) | 128 print(img) |
| 125 dev.off() | 129 dev.off() |
| 126 } | 130 } |
| 127 | 131 |
| 128 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | 132 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) |
| 133 | |
| 134 VandDCount$l = log(VandDCount$Length) | |
| 135 maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")]) | |
| 136 VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T) | |
| 137 VandDCount$relLength = VandDCount$l / VandDCount$max | |
| 138 | |
| 129 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) | 139 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) |
| 130 | 140 |
| 131 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) | 141 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) |
| 132 completeVD$Length = as.numeric(completeVD$Length) | 142 completeVD = merge(completeVD, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) |
| 133 completeVD$log = log(completeVD$Length) | |
| 134 completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | |
| 135 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 143 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
| 136 #completeVD$log[is.na(completeVD$log)] = 0 | 144 VDList = split(completeVD, f=completeVD[,"Sample"]) |
| 137 l = split(completeVD, f=completeVD[,"Sample"]) | 145 |
| 138 | 146 lapply(VDList, FUN=plotVD) |
| 139 lapply(l, FUN=plotVD) | |
| 140 | 147 |
| 141 | 148 |
| 142 | 149 |
| 143 plotVJ <- function(dat){ | 150 plotVJ <- function(dat){ |
| 144 img = ggplot() + | 151 img = ggplot() + |
| 145 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | 152 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + |
| 146 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 153 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 147 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 154 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 148 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 155 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
| 149 xlab("J genes") + | 156 xlab("J genes") + |
| 150 ylab("V Genes") | 157 ylab("V Genes") |
| 151 | 158 |
| 152 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 159 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) |
| 153 print(img) | 160 print(img) |
| 154 dev.off() | 161 dev.off() |
| 155 } | 162 } |
| 156 | 163 |
| 157 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | 164 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) |
| 158 #VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | 165 |
| 166 VandJCount$l = log(VandJCount$Length) | |
| 167 maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")]) | |
| 168 VandJCount = merge(VandJCount, maxVJ, by.x="Sample", by.y="Sample", all.x=T) | |
| 169 VandJCount$relLength = VandJCount$l / VandJCount$max | |
| 170 | |
| 159 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | 171 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) |
| 160 | 172 |
| 161 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) | 173 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) |
| 162 completeVJ$Length = as.numeric(completeVJ$Length) | 174 completeVJ = merge(completeVJ, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) |
| 163 completeVJ$log = log(completeVJ$Length) | |
| 164 completeVJ = merge(completeVJ, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | |
| 165 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | 175 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) |
| 166 #completeVJ$log[is.na(completeVJ$log)] = 0 | 176 VJList = split(completeVJ, f=completeVJ[,"Sample"]) |
| 167 l = split(completeVJ, f=completeVJ[,"Sample"]) | 177 lapply(VJList, FUN=plotVJ) |
| 168 lapply(l, FUN=plotVJ) | |
| 169 | 178 |
| 170 plotDJ <- function(dat){ | 179 plotDJ <- function(dat){ |
| 171 img = ggplot() + | 180 img = ggplot() + |
| 172 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + | 181 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=relLength)) + |
| 173 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 182 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 174 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 183 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 175 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 184 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + |
| 176 xlab("J genes") + | 185 xlab("J genes") + |
| 177 ylab("D Genes") | 186 ylab("D Genes") |
| 178 | 187 |
| 179 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 188 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) |
| 180 print(img) | 189 print(img) |
| 181 dev.off() | 190 dev.off() |
| 182 } | 191 } |
| 183 | 192 |
| 184 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 193 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) |
| 185 #DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | 194 |
| 195 DandJCount$l = log(DandJCount$Length) | |
| 196 maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")]) | |
| 197 DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T) | |
| 198 DandJCount$relLength = DandJCount$l / DandJCount$max | |
| 199 | |
| 186 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | 200 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) |
| 187 | 201 |
| 188 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) | 202 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) |
| 189 completeDJ$Length = as.numeric(completeDJ$Length) | 203 completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
| 190 completeDJ$log = log(completeDJ$Length) | |
| 191 completeDJ = merge(completeDJ, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | |
| 192 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | 204 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) |
| 193 #completeDJ$log[is.na(completeDJ$log)] = 0 | 205 DJList = split(completeDJ, f=completeDJ[,"Sample"]) |
| 194 l = split(completeDJ, f=completeDJ[,"Sample"]) | 206 lapply(DJList, FUN=plotDJ) |
| 195 lapply(l, FUN=plotDJ) | |
| 196 | 207 |
| 197 | 208 |
| 198 sampleFile <- file("samples.txt") | 209 sampleFile <- file("samples.txt") |
| 199 un = unique(test$Sample) | 210 un = unique(test$Sample) |
| 200 un = paste(un, sep="\n") | 211 un = paste(un, sep="\n") |
