Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 23:28b8d980db22 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 07 Apr 2015 04:46:35 -0400 |
parents | d84c9791d8c4 |
children | 31eee1b3d7df |
comparison
equal
deleted
inserted
replaced
22:d84c9791d8c4 | 23:28b8d980db22 |
---|---|
128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) | 128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) |
129 | 129 |
130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="") | 130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="") |
131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) | 131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) |
132 | 132 |
133 silentMutations_columns = paste(regions, ".IMGT.Nb.of.silent.mutations", sep="") | |
134 silentMutations_columns | |
135 dat[,silentMutations_columns] | |
136 dat$silentMutations = apply(dat, FUN=sum_by_row, 1, columns=silentMutations_columns) | |
137 | |
138 nonSilentMutations_columns = paste(regions, ".IMGT.Nb.of.nonsilent.mutations", sep="") | |
139 dat$nonSilentMutations = apply(dat, FUN=sum_by_row, 1, columns=nonSilentMutations_columns) | |
133 | 140 |
134 | 141 |
135 setwd(outputdir) | 142 setwd(outputdir) |
136 | 143 |
137 nts = c("a", "t", "g", "c") | 144 nts = c("a", "t", "g", "c") |
138 zeros=rep(0, 4) | 145 zeros=rep(0, 4) |
139 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=5) | 146 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=6) |
140 for(i in 1:length(genes)){ | 147 for(i in 1:length(genes)){ |
141 gene = genes[i] | 148 gene = genes[i] |
142 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] | 149 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] |
143 if(gene == "."){ | 150 if(gene == "."){ |
144 tmp = dat | 151 tmp = dat |
160 matrx[4,y] = sum(tmp$totalMutationsAtGC) | 167 matrx[4,y] = sum(tmp$totalMutationsAtGC) |
161 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | 168 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) |
162 matrx[5,x] = sum(tmp$totalMutationsAtGC) | 169 matrx[5,x] = sum(tmp$totalMutationsAtGC) |
163 matrx[5,y] = sum(tmp$VRegionMutations) | 170 matrx[5,y] = sum(tmp$VRegionMutations) |
164 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | 171 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) |
172 matrx[6,x] = sum(tmp$silentMutations) | |
173 matrx[6,y] = sum(tmp$nonSilentMutations) | |
174 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) | |
175 | |
165 | 176 |
166 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) | 177 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) |
167 row.names(transitionTable) = c("A", "C", "G", "T") | 178 row.names(transitionTable) = c("A", "C", "G", "T") |
168 transitionTable["A","A"] = NA | 179 transitionTable["A","A"] = NA |
169 transitionTable["C","C"] = NA | 180 transitionTable["C","C"] = NA |
219 matrx[4,y] = sum(tmp$totalMutationsAtGC) | 230 matrx[4,y] = sum(tmp$totalMutationsAtGC) |
220 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | 231 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) |
221 matrx[5,x] = sum(tmp$totalMutationsAtGC) | 232 matrx[5,x] = sum(tmp$totalMutationsAtGC) |
222 matrx[5,y] = sum(tmp$VRegionMutations) | 233 matrx[5,y] = sum(tmp$VRegionMutations) |
223 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | 234 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) |
235 matrx[6,x] = sum(tmp$silentMutations) | |
236 matrx[6,y] = sum(tmp$nonSilentMutations) | |
237 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) | |
224 | 238 |
225 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) | 239 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) |
226 row.names(transitionTable) = c("A", "C", "G", "T") | 240 row.names(transitionTable) = c("A", "C", "G", "T") |
227 transitionTable["A","A"] = NA | 241 transitionTable["A","A"] = NA |
228 transitionTable["C","C"] = NA | 242 transitionTable["C","C"] = NA |
255 cat(length(tmp$Sequence.ID), file="total_n.txt") | 269 cat(length(tmp$Sequence.ID), file="total_n.txt") |
256 | 270 |
257 | 271 |
258 | 272 |
259 result = data.frame(matrx) | 273 result = data.frame(matrx) |
260 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)") | 274 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)", "Silent/Non Silent (%)") |
261 | 275 |
262 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) | 276 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) |
263 | 277 |
264 | 278 |
265 if (!("ggplot2" %in% rownames(installed.packages()))) { | 279 if (!("ggplot2" %in% rownames(installed.packages()))) { |
318 } | 332 } |
319 | 333 |
320 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) | 334 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) |
321 | 335 |
322 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales) | 336 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales) |
323 p = p + geom_point(aes(colour=best_match), position="jitter") | 337 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter") |
324 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") | 338 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") |
325 | 339 |
326 png(filename="scatter.png") | 340 png(filename="scatter.png") |
327 print(p) | 341 print(p) |
328 dev.off() | 342 dev.off() |