comparison mutation_analysis.r @ 23:28b8d980db22 draft

Uploaded
author davidvanzessen
date Tue, 07 Apr 2015 04:46:35 -0400
parents d84c9791d8c4
children 31eee1b3d7df
comparison
equal deleted inserted replaced
22:d84c9791d8c4 23:28b8d980db22
128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) 128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns)
129 129
130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="") 130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="")
131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) 131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns)
132 132
133 silentMutations_columns = paste(regions, ".IMGT.Nb.of.silent.mutations", sep="")
134 silentMutations_columns
135 dat[,silentMutations_columns]
136 dat$silentMutations = apply(dat, FUN=sum_by_row, 1, columns=silentMutations_columns)
137
138 nonSilentMutations_columns = paste(regions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
139 dat$nonSilentMutations = apply(dat, FUN=sum_by_row, 1, columns=nonSilentMutations_columns)
133 140
134 141
135 setwd(outputdir) 142 setwd(outputdir)
136 143
137 nts = c("a", "t", "g", "c") 144 nts = c("a", "t", "g", "c")
138 zeros=rep(0, 4) 145 zeros=rep(0, 4)
139 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=5) 146 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=6)
140 for(i in 1:length(genes)){ 147 for(i in 1:length(genes)){
141 gene = genes[i] 148 gene = genes[i]
142 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] 149 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),]
143 if(gene == "."){ 150 if(gene == "."){
144 tmp = dat 151 tmp = dat
160 matrx[4,y] = sum(tmp$totalMutationsAtGC) 167 matrx[4,y] = sum(tmp$totalMutationsAtGC)
161 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) 168 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
162 matrx[5,x] = sum(tmp$totalMutationsAtGC) 169 matrx[5,x] = sum(tmp$totalMutationsAtGC)
163 matrx[5,y] = sum(tmp$VRegionMutations) 170 matrx[5,y] = sum(tmp$VRegionMutations)
164 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) 171 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
172 matrx[6,x] = sum(tmp$silentMutations)
173 matrx[6,y] = sum(tmp$nonSilentMutations)
174 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1)
175
165 176
166 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) 177 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros)
167 row.names(transitionTable) = c("A", "C", "G", "T") 178 row.names(transitionTable) = c("A", "C", "G", "T")
168 transitionTable["A","A"] = NA 179 transitionTable["A","A"] = NA
169 transitionTable["C","C"] = NA 180 transitionTable["C","C"] = NA
219 matrx[4,y] = sum(tmp$totalMutationsAtGC) 230 matrx[4,y] = sum(tmp$totalMutationsAtGC)
220 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) 231 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
221 matrx[5,x] = sum(tmp$totalMutationsAtGC) 232 matrx[5,x] = sum(tmp$totalMutationsAtGC)
222 matrx[5,y] = sum(tmp$VRegionMutations) 233 matrx[5,y] = sum(tmp$VRegionMutations)
223 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) 234 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
235 matrx[6,x] = sum(tmp$silentMutations)
236 matrx[6,y] = sum(tmp$nonSilentMutations)
237 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1)
224 238
225 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) 239 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4)
226 row.names(transitionTable) = c("A", "C", "G", "T") 240 row.names(transitionTable) = c("A", "C", "G", "T")
227 transitionTable["A","A"] = NA 241 transitionTable["A","A"] = NA
228 transitionTable["C","C"] = NA 242 transitionTable["C","C"] = NA
255 cat(length(tmp$Sequence.ID), file="total_n.txt") 269 cat(length(tmp$Sequence.ID), file="total_n.txt")
256 270
257 271
258 272
259 result = data.frame(matrx) 273 result = data.frame(matrx)
260 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)") 274 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)", "Silent/Non Silent (%)")
261 275
262 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) 276 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
263 277
264 278
265 if (!("ggplot2" %in% rownames(installed.packages()))) { 279 if (!("ggplot2" %in% rownames(installed.packages()))) {
318 } 332 }
319 333
320 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) 334 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2)
321 335
322 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales) 336 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales)
323 p = p + geom_point(aes(colour=best_match), position="jitter") 337 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter")
324 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") 338 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot")
325 339
326 png(filename="scatter.png") 340 png(filename="scatter.png")
327 print(p) 341 print(p)
328 dev.off() 342 dev.off()