Mercurial > repos > davidvanzessen > plotting_merged
comparison RScript.r @ 35:bd1116ba4ee1 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Fri, 08 Nov 2013 05:46:21 -0500 |
| parents | 3ec932d17ef3 |
| children | b3c97b26db60 |
comparison
equal
deleted
inserted
replaced
| 34:3ec932d17ef3 | 35:bd1116ba4ee1 |
|---|---|
| 5 inFile = args[1] | 5 inFile = args[1] |
| 6 outFile = args[2] | 6 outFile = args[2] |
| 7 outDir = args[3] | 7 outDir = args[3] |
| 8 | 8 |
| 9 if (!require("gridExtra")) { | 9 if (!require("gridExtra")) { |
| 10 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") | 10 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") |
| 11 } | 11 } |
| 12 library(gridExtra) | 12 library(gridExtra) |
| 13 if (!require("ggplot2")) { | 13 if (!require("ggplot2")) { |
| 14 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | 14 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") |
| 15 } | 15 } |
| 16 require(ggplot2) | 16 require(ggplot2) |
| 17 if (!require("plyr")) { | 17 if (!require("plyr")) { |
| 18 install.packages("plyr", repos="http://cran.xl-mirror.nl/") | 18 install.packages("plyr", repos="http://cran.xl-mirror.nl/") |
| 19 } | 19 } |
| 20 require(plyr) | 20 require(plyr) |
| 21 | |
| 22 if (!("data.table" %in% rownames(installed.packages()))) { | |
| 23 install.packages("data.table", repos="http://cran.xl-mirror.nl/") | |
| 24 } | |
| 25 library(data.table) | |
| 26 | |
| 27 | 21 |
| 28 test = read.table(inFile, sep="\t", header=TRUE) | 22 test = read.table(inFile, sep="\t", header=TRUE) |
| 29 | 23 |
| 30 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) | 24 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) |
| 31 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) | 25 test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) |
| 35 | 29 |
| 36 PROD = test[test$VDJ.Frame != "In-frame with stop codon" & test$VDJ.Frame != "Out-of-frame" & test$CDR3.Found.How != "NOT_FOUND" , ] | 30 PROD = test[test$VDJ.Frame != "In-frame with stop codon" & test$VDJ.Frame != "Out-of-frame" & test$CDR3.Found.How != "NOT_FOUND" , ] |
| 37 | 31 |
| 38 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] | 32 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] |
| 39 | 33 |
| 40 #PRODF = PROD[ -1] | 34 PRODF = PROD[ -1] |
| 41 | |
| 42 PRODF = PROD | |
| 43 | 35 |
| 44 #PRODF = unique(PRODF) | 36 #PRODF = unique(PRODF) |
| 45 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] | 37 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] |
| 46 | 38 |
| 47 PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")]) | 39 |
| 40 uniqueCount = split(PRODF, f=PRODF[,"Sample"]) | |
| 41 | |
| 42 for(i in 1:length(uniqueCount)) { | |
| 43 dat = data.frame(uniqueCount[i]) | |
| 44 sample = paste(unique(dat[,15])) | |
| 45 uniqueCount[sample] = length(dat[,1]) | |
| 46 } | |
| 47 | |
| 48 PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) | |
| 48 PRODFV$Length = as.numeric(PRODFV$Length) | 49 PRODFV$Length = as.numeric(PRODFV$Length) |
| 49 Total = 0 | 50 Total = 0 |
| 50 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 51 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
| 51 PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 52 PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
| 52 PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total)) | 53 PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total)) |
| 53 | 54 |
| 54 PRODFD = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.D.Gene")]) | 55 PRODFD = ddply(PRODF, c("Sample", "Top.D.Gene"), function(x) summary(x$VDJCDR3)) |
| 55 PRODFD$Length = as.numeric(PRODFD$Length) | 56 PRODFD$Length = as.numeric(PRODFD$Length) |
| 56 Total = 0 | 57 Total = 0 |
| 57 Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 58 Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
| 58 PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 59 PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
| 59 PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total)) | 60 PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total)) |
| 60 | 61 |
| 61 PRODFJ = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.J.Gene")]) | 62 PRODFJ = ddply(PRODF, c("Sample", "Top.J.Gene"), function(x) summary(x$VDJCDR3)) |
| 62 PRODFJ$Length = as.numeric(PRODFJ$Length) | 63 PRODFJ$Length = as.numeric(PRODFJ$Length) |
| 63 Total = 0 | 64 Total = 0 |
| 64 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 65 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
| 65 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) | 66 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) |
| 66 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) | 67 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) |
| 113 | 114 |
| 114 plotVD <- function(dat){ | 115 plotVD <- function(dat){ |
| 115 img = ggplot() + | 116 img = ggplot() + |
| 116 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | 117 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + |
| 117 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 118 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 118 scale_fill_gradient(low="gold", high="blue", na.value="white", limits=c(0, maxVD)) + | 119 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 119 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 120 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + |
| 120 xlab("D genes") + | 121 xlab("D genes") + |
| 121 ylab("V Genes") | 122 ylab("V Genes") |
| 122 | 123 |
| 123 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | 124 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) |
| 124 print(img) | 125 print(img) |
| 125 dev.off() | 126 dev.off() |
| 126 } | 127 } |
| 127 | 128 |
| 128 VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) | 129 |
| 129 | 130 VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3)) |
| 130 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) | 131 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) |
| 131 | 132 |
| 132 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) | 133 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) |
| 133 completeVD$Length = as.numeric(completeVD$Length) | 134 completeVD$Length = as.numeric(completeVD$Length) |
| 134 completeVD$log = log(completeVD$Length) | 135 completeVD$log = log(completeVD$Length) |
| 135 maxVD = max(subset(completeVD, !is.na(completeVD$log), "log")) | |
| 136 completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | 136 completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) |
| 137 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | 137 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) |
| 138 #completeVD$log[is.na(completeVD$log)] = 0 | 138 #completeVD$log[is.na(completeVD$log)] = 0 |
| 139 l = split(completeVD, f=completeVD[,"Sample"]) | 139 l = split(completeVD, f=completeVD[,"Sample"]) |
| 140 | 140 |
| 145 plotVJ <- function(dat){ | 145 plotVJ <- function(dat){ |
| 146 img = ggplot() + | 146 img = ggplot() + |
| 147 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | 147 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + |
| 148 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 148 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 149 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 149 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 150 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 150 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + |
| 151 xlab("J genes") + | 151 xlab("J genes") + |
| 152 ylab("V Genes") | 152 ylab("V Genes") |
| 153 | 153 |
| 154 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | 154 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) |
| 155 print(img) | 155 print(img) |
| 156 dev.off() | 156 dev.off() |
| 157 } | 157 } |
| 158 | 158 |
| 159 VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) | 159 VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) |
| 160 #VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
| 161 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | 160 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) |
| 162 | 161 |
| 163 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) | 162 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) |
| 164 completeVJ$Length = as.numeric(completeVJ$Length) | 163 completeVJ$Length = as.numeric(completeVJ$Length) |
| 165 completeVJ$log = log(completeVJ$Length) | 164 completeVJ$log = log(completeVJ$Length) |
| 172 plotDJ <- function(dat){ | 171 plotDJ <- function(dat){ |
| 173 img = ggplot() + | 172 img = ggplot() + |
| 174 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + | 173 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + |
| 175 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | 174 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 176 scale_fill_gradient(low="gold", high="blue", na.value="white") + | 175 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 177 ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + | 176 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + |
| 178 xlab("J genes") + | 177 xlab("J genes") + |
| 179 ylab("D Genes") | 178 ylab("D Genes") |
| 180 | 179 |
| 181 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | 180 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) |
| 182 print(img) | 181 print(img) |
| 183 dev.off() | 182 dev.off() |
| 184 } | 183 } |
| 185 | 184 |
| 186 DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) | 185 DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) |
| 187 #DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
| 188 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | 186 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) |
| 189 | 187 |
| 190 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) | 188 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) |
| 191 completeDJ$Length = as.numeric(completeDJ$Length) | 189 completeDJ$Length = as.numeric(completeDJ$Length) |
| 192 completeDJ$log = log(completeDJ$Length) | 190 completeDJ$log = log(completeDJ$Length) |
