Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 23:28b8d980db22 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 07 Apr 2015 04:46:35 -0400 |
| parents | d84c9791d8c4 |
| children | 31eee1b3d7df |
comparison
equal
deleted
inserted
replaced
| 22:d84c9791d8c4 | 23:28b8d980db22 |
|---|---|
| 128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) | 128 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns) |
| 129 | 129 |
| 130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="") | 130 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="") |
| 131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) | 131 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns) |
| 132 | 132 |
| 133 silentMutations_columns = paste(regions, ".IMGT.Nb.of.silent.mutations", sep="") | |
| 134 silentMutations_columns | |
| 135 dat[,silentMutations_columns] | |
| 136 dat$silentMutations = apply(dat, FUN=sum_by_row, 1, columns=silentMutations_columns) | |
| 137 | |
| 138 nonSilentMutations_columns = paste(regions, ".IMGT.Nb.of.nonsilent.mutations", sep="") | |
| 139 dat$nonSilentMutations = apply(dat, FUN=sum_by_row, 1, columns=nonSilentMutations_columns) | |
| 133 | 140 |
| 134 | 141 |
| 135 setwd(outputdir) | 142 setwd(outputdir) |
| 136 | 143 |
| 137 nts = c("a", "t", "g", "c") | 144 nts = c("a", "t", "g", "c") |
| 138 zeros=rep(0, 4) | 145 zeros=rep(0, 4) |
| 139 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=5) | 146 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=6) |
| 140 for(i in 1:length(genes)){ | 147 for(i in 1:length(genes)){ |
| 141 gene = genes[i] | 148 gene = genes[i] |
| 142 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] | 149 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] |
| 143 if(gene == "."){ | 150 if(gene == "."){ |
| 144 tmp = dat | 151 tmp = dat |
| 160 matrx[4,y] = sum(tmp$totalMutationsAtGC) | 167 matrx[4,y] = sum(tmp$totalMutationsAtGC) |
| 161 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | 168 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) |
| 162 matrx[5,x] = sum(tmp$totalMutationsAtGC) | 169 matrx[5,x] = sum(tmp$totalMutationsAtGC) |
| 163 matrx[5,y] = sum(tmp$VRegionMutations) | 170 matrx[5,y] = sum(tmp$VRegionMutations) |
| 164 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | 171 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) |
| 172 matrx[6,x] = sum(tmp$silentMutations) | |
| 173 matrx[6,y] = sum(tmp$nonSilentMutations) | |
| 174 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) | |
| 175 | |
| 165 | 176 |
| 166 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) | 177 transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros) |
| 167 row.names(transitionTable) = c("A", "C", "G", "T") | 178 row.names(transitionTable) = c("A", "C", "G", "T") |
| 168 transitionTable["A","A"] = NA | 179 transitionTable["A","A"] = NA |
| 169 transitionTable["C","C"] = NA | 180 transitionTable["C","C"] = NA |
| 219 matrx[4,y] = sum(tmp$totalMutationsAtGC) | 230 matrx[4,y] = sum(tmp$totalMutationsAtGC) |
| 220 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | 231 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) |
| 221 matrx[5,x] = sum(tmp$totalMutationsAtGC) | 232 matrx[5,x] = sum(tmp$totalMutationsAtGC) |
| 222 matrx[5,y] = sum(tmp$VRegionMutations) | 233 matrx[5,y] = sum(tmp$VRegionMutations) |
| 223 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | 234 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) |
| 235 matrx[6,x] = sum(tmp$silentMutations) | |
| 236 matrx[6,y] = sum(tmp$nonSilentMutations) | |
| 237 matrx[6,z] = round(matrx[6,x] / matrx[6,y] * 100, digits=1) | |
| 224 | 238 |
| 225 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) | 239 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) |
| 226 row.names(transitionTable) = c("A", "C", "G", "T") | 240 row.names(transitionTable) = c("A", "C", "G", "T") |
| 227 transitionTable["A","A"] = NA | 241 transitionTable["A","A"] = NA |
| 228 transitionTable["C","C"] = NA | 242 transitionTable["C","C"] = NA |
| 255 cat(length(tmp$Sequence.ID), file="total_n.txt") | 269 cat(length(tmp$Sequence.ID), file="total_n.txt") |
| 256 | 270 |
| 257 | 271 |
| 258 | 272 |
| 259 result = data.frame(matrx) | 273 result = data.frame(matrx) |
| 260 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)") | 274 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)", "Silent/Non Silent (%)") |
| 261 | 275 |
| 262 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) | 276 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) |
| 263 | 277 |
| 264 | 278 |
| 265 if (!("ggplot2" %in% rownames(installed.packages()))) { | 279 if (!("ggplot2" %in% rownames(installed.packages()))) { |
| 318 } | 332 } |
| 319 | 333 |
| 320 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) | 334 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2) |
| 321 | 335 |
| 322 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales) | 336 p = ggplot(dat, aes(best_match, percentage_mutations))# + scale_y_log10(breaks=scales,labels=scales) |
| 323 p = p + geom_point(aes(colour=best_match), position="jitter") | 337 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter") |
| 324 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") | 338 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") |
| 325 | 339 |
| 326 png(filename="scatter.png") | 340 png(filename="scatter.png") |
| 327 print(p) | 341 print(p) |
| 328 dev.off() | 342 dev.off() |
