Mercurial > repos > davidvanzessen > plotting_merged
changeset 38:6e490e056fc4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 14 Nov 2013 06:57:35 -0500 |
parents | 16fe6233f90e |
children | a9ad03d52680 |
files | RScript.r |
diffstat | 1 files changed, 38 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/RScript.r Fri Nov 08 06:54:21 2013 -0500 +++ b/RScript.r Thu Nov 14 06:57:35 2013 -0500 @@ -25,7 +25,7 @@ library(data.table) -test = read.table(inFile, sep="\t", header=TRUE) +test = read.table(inFile, sep="\t", header=TRUE, fill=T) test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) test$Top.D.Gene = gsub("[*]([0-9]+)", "", test$Top.D.Gene) @@ -110,13 +110,17 @@ pJ dev.off(); +revVchain = Vchain +revDchain = Dchain +revVchain$chr.orderV = rev(revVchain$chr.orderV) +revDchain$chr.orderD = rev(revDchain$chr.orderD) plotVD <- function(dat){ img = ggplot() + - geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + + geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_gradient(low="gold", high="blue", na.value="white") + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + + ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + xlab("D genes") + ylab("V Genes") @@ -126,26 +130,29 @@ } VandDCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.D.Gene", "Sample")]) + +VandDCount$l = log(VandDCount$Length) +maxVD = data.frame(data.table(VandDCount)[, list(max=max(l)), by=c("Sample")]) +VandDCount = merge(VandDCount, maxVD, by.x="Sample", by.y="Sample", all.x=T) +VandDCount$relLength = VandDCount$l / VandDCount$max + cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) -completeVD$Length = as.numeric(completeVD$Length) -completeVD$log = log(completeVD$Length) -completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) +completeVD = merge(completeVD, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) -#completeVD$log[is.na(completeVD$log)] = 0 -l = split(completeVD, f=completeVD[,"Sample"]) +VDList = split(completeVD, f=completeVD[,"Sample"]) -lapply(l, FUN=plotVD) +lapply(VDList, FUN=plotVD) plotVJ <- function(dat){ img = ggplot() + - geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + + geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=relLength)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_gradient(low="gold", high="blue", na.value="white") + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + + ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + xlab("J genes") + ylab("V Genes") @@ -155,24 +162,26 @@ } VandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.V.Gene", "Top.J.Gene", "Sample")]) -#VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) + +VandJCount$l = log(VandJCount$Length) +maxVJ = data.frame(data.table(VandJCount)[, list(max=max(l)), by=c("Sample")]) +VandJCount = merge(VandJCount, maxVJ, by.x="Sample", by.y="Sample", all.x=T) +VandJCount$relLength = VandJCount$l / VandJCount$max + cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) -completeVJ$Length = as.numeric(completeVJ$Length) -completeVJ$log = log(completeVJ$Length) -completeVJ = merge(completeVJ, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) +completeVJ = merge(completeVJ, revVchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) -#completeVJ$log[is.na(completeVJ$log)] = 0 -l = split(completeVJ, f=completeVJ[,"Sample"]) -lapply(l, FUN=plotVJ) +VJList = split(completeVJ, f=completeVJ[,"Sample"]) +lapply(VJList, FUN=plotVJ) plotDJ <- function(dat){ img = ggplot() + - geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + + geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=relLength)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + scale_fill_gradient(low="gold", high="blue", na.value="white") + - ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat[!is.na(dat[,4]),][4]) ,")", sep="")) + + ggtitle(paste(unique(dat$Sample), " (N=" , sum(dat$Length, na.rm=T) ,")", sep="")) + xlab("J genes") + ylab("D Genes") @@ -182,17 +191,19 @@ } DandJCount = data.frame(data.table(PRODF)[, list(Length=.N), by=c("Top.D.Gene", "Top.J.Gene", "Sample")]) -#DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) + +DandJCount$l = log(DandJCount$Length) +maxDJ = data.frame(data.table(DandJCount)[, list(max=max(l)), by=c("Sample")]) +DandJCount = merge(DandJCount, maxDJ, by.x="Sample", by.y="Sample", all.x=T) +DandJCount$relLength = DandJCount$l / DandJCount$max + cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) -completeDJ$Length = as.numeric(completeDJ$Length) -completeDJ$log = log(completeDJ$Length) -completeDJ = merge(completeDJ, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) +completeDJ = merge(completeDJ, revDchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) -#completeDJ$log[is.na(completeDJ$log)] = 0 -l = split(completeDJ, f=completeDJ[,"Sample"]) -lapply(l, FUN=plotDJ) +DJList = split(completeDJ, f=completeDJ[,"Sample"]) +lapply(DJList, FUN=plotDJ) sampleFile <- file("samples.txt")