Mercurial > repos > davidvanzessen > plotting_merged
changeset 32:548b3035bbd7 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 08 Nov 2013 05:29:29 -0500 |
parents | d26e8c208b5f |
children | 8d9be90792b5 |
files | RScript.r |
diffstat | 1 files changed, 121 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/RScript.r Fri Nov 08 05:29:20 2013 -0500 +++ b/RScript.r Fri Nov 08 05:29:29 2013 -0500 @@ -4,12 +4,19 @@ inFile = args[1] outFile = args[2] +outDir = args[3] -#install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") -library (gridExtra) -#install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") +if (!require("gridExtra")) { +install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") +} +library(gridExtra) +if (!require("ggplot2")) { +install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") +} require(ggplot2) -#install.packages("plyr", repos="http://cran.xl-mirror.nl/") +if (!require("plyr")) { +install.packages("plyr", repos="http://cran.xl-mirror.nl/") +} require(plyr) test = read.table(inFile, sep="\t", header=TRUE) @@ -26,12 +33,17 @@ PRODF = PROD[ -1] -#unique(PRODF[duplicated(PRODF),]) -#length(row.names(PRODF[duplicated(PRODF),])) +#PRODF = unique(PRODF) +PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] + + +uniqueCount = split(PRODF, f=PRODF[,"Sample"]) -#length(row.names(PRODF)) -PRODF = unique(PRODF) -#length(row.names(PRODF)) +for(i in 1:length(uniqueCount)) { + dat = data.frame(uniqueCount[i]) + sample = paste(unique(dat[,15])) + uniqueCount[sample] = length(dat[,1]) +} PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) PRODFV$Length = as.numeric(PRODFV$Length) @@ -73,28 +85,118 @@ PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) close(tcJ) +setwd(outDir) + pV = ggplot(PRODFV) pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") -#pV + +png("VPlot.png",width = 1280, height = 720) +pV +dev.off(); pD = ggplot(PRODFD) pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") -#pD + +png("DPlot.png",width = 800, height = 600) +pD +dev.off(); pJ = ggplot(PRODFJ) pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) -pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") -#pJ +pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") + +png("JPlot.png",width = 800, height = 600) +pJ +dev.off(); + -#plotall = grid.arrange(pV, arrangeGrob(pD, pJ, ncol=2), ncol=1, widths=c(1,1.2)) -#plotall -#ggsave(outFile, dpi=125) +plotVD <- function(dat){ + img = ggplot() + + geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + scale_fill_gradient(low="gold", high="blue", na.value="white") + + ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + + xlab("D genes") + + ylab("V Genes") + + png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) + print(img) + dev.off() +} + + +VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3)) +cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) + +completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) +completeVD$Length = as.numeric(completeVD$Length) +completeVD$log = log(completeVD$Length) +completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) +completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) +#completeVD$log[is.na(completeVD$log)] = 0 +l = split(completeVD, f=completeVD[,"Sample"]) + +lapply(l, FUN=plotVD) + -png(outFile,width = 1920, height = 1200) -plotall = grid.arrange(pV, arrangeGrob(pD, pJ, ncol=2), ncol=1, widths=c(1,1.2)) -dev.off() +plotVJ <- function(dat){ + img = ggplot() + + geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + scale_fill_gradient(low="gold", high="blue", na.value="white") + + ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + + xlab("J genes") + + ylab("V Genes") + + png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) + print(img) + dev.off() +} + +VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) +cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) + +completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) +completeVJ$Length = as.numeric(completeVJ$Length) +completeVJ$log = log(completeVJ$Length) +completeVJ = merge(completeVJ, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) +completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) +#completeVJ$log[is.na(completeVJ$log)] = 0 +l = split(completeVJ, f=completeVJ[,"Sample"]) +lapply(l, FUN=plotVJ) + +plotDJ <- function(dat){ + img = ggplot() + + geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + scale_fill_gradient(low="gold", high="blue", na.value="white") + + ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + + xlab("J genes") + + ylab("D Genes") + + png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) + print(img) + dev.off() +} + +DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) +cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) + +completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) +completeDJ$Length = as.numeric(completeDJ$Length) +completeDJ$log = log(completeDJ$Length) +completeDJ = merge(completeDJ, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) +completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) +#completeDJ$log[is.na(completeDJ$log)] = 0 +l = split(completeDJ, f=completeDJ[,"Sample"]) +lapply(l, FUN=plotDJ) +sampleFile <- file("samples.txt") +un = unique(test$Sample) +un = paste(un, sep="\n") +writeLines(un, sampleFile) +close(sampleFile)