Mercurial > repos > davidvanzessen > plotting_merged
comparison RScript.r @ 32:548b3035bbd7 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 08 Nov 2013 05:29:29 -0500 |
parents | 1c25c75d863d |
children | 8d9be90792b5 |
comparison
equal
deleted
inserted
replaced
31:d26e8c208b5f | 32:548b3035bbd7 |
---|---|
2 | 2 |
3 args <- commandArgs(trailingOnly = TRUE) | 3 args <- commandArgs(trailingOnly = TRUE) |
4 | 4 |
5 inFile = args[1] | 5 inFile = args[1] |
6 outFile = args[2] | 6 outFile = args[2] |
7 | 7 outDir = args[3] |
8 #install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") | 8 |
9 library (gridExtra) | 9 if (!require("gridExtra")) { |
10 #install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | 10 install.packages("gridExtra", repos="http://cran.xl-mirror.nl/") |
11 } | |
12 library(gridExtra) | |
13 if (!require("ggplot2")) { | |
14 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | |
15 } | |
11 require(ggplot2) | 16 require(ggplot2) |
12 #install.packages("plyr", repos="http://cran.xl-mirror.nl/") | 17 if (!require("plyr")) { |
18 install.packages("plyr", repos="http://cran.xl-mirror.nl/") | |
19 } | |
13 require(plyr) | 20 require(plyr) |
14 | 21 |
15 test = read.table(inFile, sep="\t", header=TRUE) | 22 test = read.table(inFile, sep="\t", header=TRUE) |
16 | 23 |
17 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) | 24 test$Top.V.Gene = gsub("[*]([0-9]+)", "", test$Top.V.Gene) |
24 | 31 |
25 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] | 32 NONPROD = test[test$VDJ.Frame == "In-frame with stop codon" | test$VDJ.Frame == "Out-of-frame" | test$CDR3.Found.How == "NOT_FOUND" , ] |
26 | 33 |
27 PRODF = PROD[ -1] | 34 PRODF = PROD[ -1] |
28 | 35 |
29 #unique(PRODF[duplicated(PRODF),]) | 36 #PRODF = unique(PRODF) |
30 #length(row.names(PRODF[duplicated(PRODF),])) | 37 PRODF = PRODF[!duplicated(PRODF$VDJCDR3), ] |
31 | 38 |
32 #length(row.names(PRODF)) | 39 |
33 PRODF = unique(PRODF) | 40 uniqueCount = split(PRODF, f=PRODF[,"Sample"]) |
34 #length(row.names(PRODF)) | 41 |
42 for(i in 1:length(uniqueCount)) { | |
43 dat = data.frame(uniqueCount[i]) | |
44 sample = paste(unique(dat[,15])) | |
45 uniqueCount[sample] = length(dat[,1]) | |
46 } | |
35 | 47 |
36 PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) | 48 PRODFV = ddply(PRODF, c("Sample", "Top.V.Gene"), function(x) summary(x$VDJCDR3)) |
37 PRODFV$Length = as.numeric(PRODFV$Length) | 49 PRODFV$Length = as.numeric(PRODFV$Length) |
38 Total = 0 | 50 Total = 0 |
39 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 51 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
71 tcJ = textConnection(J) | 83 tcJ = textConnection(J) |
72 Jchain = read.table(tcJ, sep="\t", header=TRUE) | 84 Jchain = read.table(tcJ, sep="\t", header=TRUE) |
73 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) | 85 PRODFJ = merge(PRODFJ, Jchain, by.x='Top.J.Gene', by.y='v.name', all.x=TRUE) |
74 close(tcJ) | 86 close(tcJ) |
75 | 87 |
88 setwd(outDir) | |
89 | |
76 pV = ggplot(PRODFV) | 90 pV = ggplot(PRODFV) |
77 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 91 pV = pV + geom_bar( aes( x=factor(reorder(Top.V.Gene, chr.orderV)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
78 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") | 92 pV = pV + xlab("Summary of V gene") + ylab("Frequency") + ggtitle("Relative frequency of V gene usage") |
79 #pV | 93 |
94 png("VPlot.png",width = 1280, height = 720) | |
95 pV | |
96 dev.off(); | |
80 | 97 |
81 pD = ggplot(PRODFD) | 98 pD = ggplot(PRODFD) |
82 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 99 pD = pD + geom_bar( aes( x=factor(reorder(Top.D.Gene, chr.orderD)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
83 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") | 100 pD = pD + xlab("Summary of D gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") |
84 #pD | 101 |
102 png("DPlot.png",width = 800, height = 600) | |
103 pD | |
104 dev.off(); | |
85 | 105 |
86 pJ = ggplot(PRODFJ) | 106 pJ = ggplot(PRODFJ) |
87 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) | 107 pJ = pJ + geom_bar( aes( x=factor(reorder(Top.J.Gene, chr.orderJ)), y=relFreq, fill=Sample), stat='identity', position="dodge") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) |
88 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of D gene usage") | 108 pJ = pJ + xlab("Summary of J gene") + ylab("Frequency") + ggtitle("Relative frequency of J gene usage") |
89 #pJ | 109 |
90 | 110 png("JPlot.png",width = 800, height = 600) |
91 #plotall = grid.arrange(pV, arrangeGrob(pD, pJ, ncol=2), ncol=1, widths=c(1,1.2)) | 111 pJ |
92 #plotall | 112 dev.off(); |
93 #ggsave(outFile, dpi=125) | 113 |
94 | 114 |
95 | 115 plotVD <- function(dat){ |
96 png(outFile,width = 1920, height = 1200) | 116 img = ggplot() + |
97 plotall = grid.arrange(pV, arrangeGrob(pD, pJ, ncol=2), ncol=1, widths=c(1,1.2)) | 117 geom_tile(data=dat, aes(x=factor(reorder(Top.D.Gene, chr.orderD)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + |
98 dev.off() | 118 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + |
99 | 119 scale_fill_gradient(low="gold", high="blue", na.value="white") + |
100 | 120 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + |
121 xlab("D genes") + | |
122 ylab("V Genes") | |
123 | |
124 png(paste("HeatmapVD_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Dchain$v.name)), height=100+(15*length(Vchain$v.name))) | |
125 print(img) | |
126 dev.off() | |
127 } | |
128 | |
129 | |
130 VandDCount = ddply(PRODF, c("Top.V.Gene", "Top.D.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
131 cartegianProductVD = expand.grid(Top.V.Gene = Vchain$v.name, Top.D.Gene = Dchain$v.name, Sample = unique(test$Sample)) | |
132 | |
133 completeVD = merge(VandDCount, cartegianProductVD, all.y=TRUE) | |
134 completeVD$Length = as.numeric(completeVD$Length) | |
135 completeVD$log = log(completeVD$Length) | |
136 completeVD = merge(completeVD, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | |
137 completeVD = merge(completeVD, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | |
138 #completeVD$log[is.na(completeVD$log)] = 0 | |
139 l = split(completeVD, f=completeVD[,"Sample"]) | |
140 | |
141 lapply(l, FUN=plotVD) | |
142 | |
143 | |
144 | |
145 plotVJ <- function(dat){ | |
146 img = ggplot() + | |
147 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.V.Gene, chr.orderV)), fill=log)) + | |
148 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
149 scale_fill_gradient(low="gold", high="blue", na.value="white") + | |
150 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + | |
151 xlab("J genes") + | |
152 ylab("V Genes") | |
153 | |
154 png(paste("HeatmapVJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Vchain$v.name))) | |
155 print(img) | |
156 dev.off() | |
157 } | |
158 | |
159 VandJCount = ddply(PRODF, c("Top.V.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
160 cartegianProductVJ = expand.grid(Top.V.Gene = Vchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | |
161 | |
162 completeVJ = merge(VandJCount, cartegianProductVJ, all.y=TRUE) | |
163 completeVJ$Length = as.numeric(completeVJ$Length) | |
164 completeVJ$log = log(completeVJ$Length) | |
165 completeVJ = merge(completeVJ, Vchain, by.x="Top.V.Gene", by.y="v.name", all.x=TRUE) | |
166 completeVJ = merge(completeVJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | |
167 #completeVJ$log[is.na(completeVJ$log)] = 0 | |
168 l = split(completeVJ, f=completeVJ[,"Sample"]) | |
169 lapply(l, FUN=plotVJ) | |
170 | |
171 plotDJ <- function(dat){ | |
172 img = ggplot() + | |
173 geom_tile(data=dat, aes(x=factor(reorder(Top.J.Gene, chr.orderJ)), y=factor(reorder(Top.D.Gene, chr.orderD)), fill=log)) + | |
174 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
175 scale_fill_gradient(low="gold", high="blue", na.value="white") + | |
176 ggtitle(paste(unique(dat$Sample), " (N=" , uniqueCount[paste(unique(dat$Sample))] ,")", sep="")) + | |
177 xlab("J genes") + | |
178 ylab("D Genes") | |
179 | |
180 png(paste("HeatmapDJ_", unique(dat[3])[1,1] , ".png", sep=""), width=150+(15*length(Jchain$v.name)), height=100+(15*length(Dchain$v.name))) | |
181 print(img) | |
182 dev.off() | |
183 } | |
184 | |
185 DandJCount = ddply(PRODF, c("Top.D.Gene", "Top.J.Gene", "Sample"), function(x) summary(x$VDJCDR3)) | |
186 cartegianProductDJ = expand.grid(Top.D.Gene = Dchain$v.name, Top.J.Gene = Jchain$v.name, Sample = unique(test$Sample)) | |
187 | |
188 completeDJ = merge(DandJCount, cartegianProductDJ, all.y=TRUE) | |
189 completeDJ$Length = as.numeric(completeDJ$Length) | |
190 completeDJ$log = log(completeDJ$Length) | |
191 completeDJ = merge(completeDJ, Dchain, by.x="Top.D.Gene", by.y="v.name", all.x=TRUE) | |
192 completeDJ = merge(completeDJ, Jchain, by.x="Top.J.Gene", by.y="v.name", all.x=TRUE) | |
193 #completeDJ$log[is.na(completeDJ$log)] = 0 | |
194 l = split(completeDJ, f=completeDJ[,"Sample"]) | |
195 lapply(l, FUN=plotDJ) | |
196 | |
197 | |
198 sampleFile <- file("samples.txt") | |
199 un = unique(test$Sample) | |
200 un = paste(un, sep="\n") | |
201 writeLines(un, sampleFile) | |
202 close(sampleFile) |