Mercurial > repos > davidvanzessen > plotting_merged
comparison RScript.r @ 32:548b3035bbd7 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Fri, 08 Nov 2013 05:29:29 -0500 |
| parents | 1c25c75d863d |
| children | 8d9be90792b5 |
comparison
equal
deleted
inserted
replaced
| 31:d26e8c208b5f | 32:548b3035bbd7 |
|---|---|
| 2 | 2 |
| 3 args <- commandArgs(trailingOnly = TRUE) | 3 args <- commandArgs(trailingOnly = TRUE) |
| 4 | 4 |
| 5 inFile = args[1] | 5 inFile = args[1] |
| 6 outFile = args[2] | 6 outFile = args[2] |
| 7 | 7 outDir = args[3] |
| 8 #install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") | 8 |
| 9 library (gridExtra) | 9 if (!require("gridExtra")) { |
| 10 #install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | 10 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") |
| 11 } | |
| 12 library(gridExtra) | |
| 13 if (!require("ggplot2")) { | |
| 14 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | |
| 15 } | |
| 11 require(ggplot2) | 16 require(ggplot2) |
| 12 #install.packages("plyr", repos="http://cran.xl-mirror.nl/") | 17 if (!require("plyr")) { |
| 18 install.packages("plyr", repos="http://cran.xl-mirror.nl/") | |
| 19 } | |
| 13 require(plyr) | 20 require(plyr) |
| 14 | 21 |
| 15 test = read.table(inFile, sep="\t", header=TRUE) | 22 test = read.table(inFile, sep="\t", header=TRUE) |
| 16 | 23 |
| 17 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) | 24 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) |
| 24 | 31 |
| 25 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] | 32 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] |
| 26 | 33 |
| 27 PRODF = PROD[ -1] | 34 PRODF = PROD[ -1] |
| 28 | 35 |
| 29 #unique(PRODF[duplicated(PRODF),]) | 36 #PRODF = unique(PRODF) |
| 30 #length(row.names(PRODF[duplicated(PRODF),])) | 37 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] |
| 31 | 38 |
| 32 #length(row.names(PRODF)) | 39 |
| 33 PRODF = unique(PRODF) | 40 uniqueCount = split(PRODF, f=PRODF[,"Sample"]) |
| 34 #length(row.names(PRODF)) | 41 |
| 42 for(i in 1:length(uniqueCount)) { | |
| 43 dat = data.frame(uniqueCount[i]) | |
| 44 sample = paste(unique(dat[,15])) | |
| 45 uniqueCount[sample] = length(dat[,1]) | |
| 46 } | |
| 35 | 47 |
| 36 PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) | 48 PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) |
| 37 PRODFV$Length = as.numeric(PRODFV$Length) | 49 PRODFV$Length = as.numeric(PRODFV$Length) |
| 38 Total = 0 | 50 Total = 0 |
| 39 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 51 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
| 71 tcJ = textConnection(J) | 83 tcJ = textConnection(J) |
| 72 Jchain = read.table(tcJ, sep="\t", header=TRUE) | 84 Jchain = read.table(tcJ, sep="\t", header=TRUE) |
| 73 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) | 85 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) |
| 74 close(tcJ) | 86 close(tcJ) |
| 75 | 87 |
| 88 setwd(outDir) | |
| 89 | |
| 76 pV = ggplot(PRODFV) | 90 pV = ggplot(PRODFV) |
| 77 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 91 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 78 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") | 92 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") |
| 79 #pV | 93 |
| 94 png("VPlot.png",width = 1280, height = 720) | |
| 95 pV | |
| 96 dev.off(); | |
| 80 | 97 |
| 81 pD = ggplot(PRODFD) | 98 pD = ggplot(PRODFD) |
| 82 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 99 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 83 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") | 100 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") |
| 84 #pD | 101 |
| 102 png("DPlot.png",width = 800, height = 600) | |
| 103 pD | |
| 104 dev.off(); | |
| 85 | 105 |
| 86 pJ = ggplot(PRODFJ) | 106 pJ = ggplot(PRODFJ) |
| 87 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 107 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
| 88 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") | 108 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") |
| 89 #pJ | 109 |
| 90 | 110 png("JPlot.png",width = 800, height = 600) |
| 91 #plotall = grid.arrange(pV, arrangeGrob(pD, pJ, ncol=2), ncol=1, widths=c(1,1.2)) | 111 pJ |
| 92 #plotall | 112 dev.off(); |
| 93 #ggsave(outFile, dpi=125) | 113 |
| 94 | 114 |
| 95 | 115 plotVD <- function(dat){ |
| 96 png(outFile,width = 1920, height = 1200) | 116 img = ggplot() + |
| 97 plotall = grid.arrange(pV, arrangeGrob(pD, pJ, ncol=2), ncol=1, widths=c(1,1.2)) | 117 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + |
| 98 dev.off() | 118 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
| 99 | 119 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
| 100 | 120 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + |
| 121 xlab("D genes") + | |
| 122 ylab("V Genes") | |
| 123 | |
| 124 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | |
| 125 print(img) | |
| 126 dev.off() | |
| 127 } | |
| 128 | |
| 129 | |
| 130 VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
| 131 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) | |
| 132 | |
| 133 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) | |
| 134 completeVD$Length = as.numeric(completeVD$Length) | |
| 135 completeVD$log = log(completeVD$Length) | |
| 136 completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | |
| 137 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | |
| 138 #completeVD$log[is.na(completeVD$log)] = 0 | |
| 139 l = split(completeVD, f=completeVD[,"Sample"]) | |
| 140 | |
| 141 lapply(l, FUN=plotVD) | |
| 142 | |
| 143 | |
| 144 | |
| 145 plotVJ <- function(dat){ | |
| 146 img = ggplot() + | |
| 147 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | |
| 148 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
| 149 scale_fill_gradient(low="gold", high="blue", na.value="white") + | |
| 150 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + | |
| 151 xlab("J genes") + | |
| 152 ylab("V Genes") | |
| 153 | |
| 154 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | |
| 155 print(img) | |
| 156 dev.off() | |
| 157 } | |
| 158 | |
| 159 VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
| 160 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | |
| 161 | |
| 162 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) | |
| 163 completeVJ$Length = as.numeric(completeVJ$Length) | |
| 164 completeVJ$log = log(completeVJ$Length) | |
| 165 completeVJ = merge(completeVJ, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | |
| 166 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | |
| 167 #completeVJ$log[is.na(completeVJ$log)] = 0 | |
| 168 l = split(completeVJ, f=completeVJ[,"Sample"]) | |
| 169 lapply(l, FUN=plotVJ) | |
| 170 | |
| 171 plotDJ <- function(dat){ | |
| 172 img = ggplot() + | |
| 173 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + | |
| 174 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
| 175 scale_fill_gradient(low="gold", high="blue", na.value="white") + | |
| 176 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + | |
| 177 xlab("J genes") + | |
| 178 ylab("D Genes") | |
| 179 | |
| 180 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | |
| 181 print(img) | |
| 182 dev.off() | |
| 183 } | |
| 184 | |
| 185 DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
| 186 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | |
| 187 | |
| 188 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) | |
| 189 completeDJ$Length = as.numeric(completeDJ$Length) | |
| 190 completeDJ$log = log(completeDJ$Length) | |
| 191 completeDJ = merge(completeDJ, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | |
| 192 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | |
| 193 #completeDJ$log[is.na(completeDJ$log)] = 0 | |
| 194 l = split(completeDJ, f=completeDJ[,"Sample"]) | |
| 195 lapply(l, FUN=plotDJ) | |
| 196 | |
| 197 | |
| 198 sampleFile <- file("samples.txt") | |
| 199 un = unique(test$Sample) | |
| 200 un = paste(un, sep="\n") | |
| 201 writeLines(un, sampleFile) | |
| 202 close(sampleFile) |
