Mercurial > repos > davidvanzessen > plotting_merged
changeset 35:bd1116ba4ee1 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 08 Nov 2013 05:46:21 -0500 |
parents | 3ec932d17ef3 |
children | b3c97b26db60 |
files | RScript.r |
diffstat | 1 files changed, 23 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/RScript.r Fri Nov 08 05:44:12 2013 -0500 +++ b/RScript.r Fri Nov 08 05:46:21 2013 -0500 @@ -7,24 +7,18 @@ outDir = args[3] if (!require("gridExtra")) { - install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") +install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") } library(gridExtra) if (!require("ggplot2")) { - install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") +install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") } require(ggplot2) if (!require("plyr")) { - install.packages("plyr", repos="http://cran.xl-mirror.nl/") +install.packages("plyr", repos="http://cran.xl-mirror.nl/") } require(plyr) -if (!("data.table" %in% rownames(installed.packages()))) { - install.packages("data.table", repos="http://cran.xl-mirror.nl/") -} -library(data.table) - - test = read.table(inFile, sep="\t", header=TRUE) test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) @@ -37,28 +31,35 @@ NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] -#PRODF = PROD[ -1] - -PRODF = PROD +PRODF = PROD[ -1] #PRODF = unique(PRODF) PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] -PRODFV = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.V.Gene")]) + +uniqueCount = split(PRODF, f=PRODF[,"Sample"]) + +for(i in 1:length(uniqueCount)) { + dat = data.frame(uniqueCount[i]) + sample = paste(unique(dat[,15])) + uniqueCount[sample] = length(dat[,1]) +} + +PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) PRODFV$Length = as.numeric(PRODFV$Length) Total = 0 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) PRODFV = merge(PRODFV, Total, by.x='Sample', by.y='Sample', all.x=TRUE) PRODFV = ddply(PRODFV, c("Sample", "Top.V.Gene"), summarise, relFreq= (Length*100 / Total)) -PRODFD = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.D.Gene")]) +PRODFD = ddply(PRODF, c("Sample", "Top.D.Gene"), function(x) summary(x$VDJCDR3)) PRODFD$Length = as.numeric(PRODFD$Length) Total = 0 Total = ddply(PRODFD, .(Sample), function(x) data.frame(Total = sum(x$Length))) PRODFD = merge(PRODFD, Total, by.x='Sample', by.y='Sample', all.x=TRUE) PRODFD = ddply(PRODFD, c("Sample", "Top.D.Gene"), summarise, relFreq= (Length*100 / Total)) -PRODFJ = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Sample", "Top.J.Gene")]) +PRODFJ = ddply(PRODF, c("Sample", "Top.J.Gene"), function(x) summary(x$VDJCDR3)) PRODFJ$Length = as.numeric(PRODFJ$Length) Total = 0 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) @@ -115,8 +116,8 @@ img = ggplot() + geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + - scale_fill_gradient(low="gold", high="blue", na.value="white", limits=c(0, maxVD)) + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + + scale_fill_gradient(low="gold", high="blue", na.value="white") + + ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + xlab("D genes") + ylab("V Genes") @@ -125,14 +126,13 @@ dev.off() } -VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) +VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3)) cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) completeVD$Length = as.numeric(completeVD$Length) completeVD$log = log(completeVD$Length) -maxVD = max(subset(completeVD, !is.na(completeVD$log), "log")) completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) #completeVD$log[is.na(completeVD$log)] = 0 @@ -147,7 +147,7 @@ geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_gradient(low="gold", high="blue", na.value="white") + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + + ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + xlab("J genes") + ylab("V Genes") @@ -156,8 +156,7 @@ dev.off() } -VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) -#VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) +VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) @@ -174,7 +173,7 @@ geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_gradient(low="gold", high="blue", na.value="white") + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + + ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + xlab("J genes") + ylab("D Genes") @@ -183,8 +182,7 @@ dev.off() } -DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) -#DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) +DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE)