Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 73:13c3710604ef draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 04 May 2016 08:40:31 -0400 |
parents | 51d92233fb5d |
children | b523ce95d857 |
comparison
equal
deleted
inserted
replaced
72:51d92233fb5d | 73:13c3710604ef |
---|---|
4 args <- commandArgs(trailingOnly = TRUE) | 4 args <- commandArgs(trailingOnly = TRUE) |
5 | 5 |
6 input = args[1] | 6 input = args[1] |
7 genes = unlist(strsplit(args[2], ",")) | 7 genes = unlist(strsplit(args[2], ",")) |
8 outputdir = args[3] | 8 outputdir = args[3] |
9 print(args[4]) | |
10 include_fr1 = ifelse(args[4] == "yes", T, F) | 9 include_fr1 = ifelse(args[4] == "yes", T, F) |
11 setwd(outputdir) | 10 setwd(outputdir) |
12 | 11 |
13 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) | 12 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) |
14 | 13 |
25 transitionTable["T","T"] = NA | 24 transitionTable["T","T"] = NA |
26 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) | 25 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) |
27 cat("0", file="n.txt") | 26 cat("0", file="n.txt") |
28 stop("No data") | 27 stop("No data") |
29 } | 28 } |
30 | |
31 | |
32 | 29 |
33 cleanup_columns = c("FR1.IMGT.c.a", | 30 cleanup_columns = c("FR1.IMGT.c.a", |
34 "FR2.IMGT.g.t", | 31 "FR2.IMGT.g.t", |
35 "CDR1.IMGT.Nb.of.nucleotides", | 32 "CDR1.IMGT.Nb.of.nucleotides", |
36 "CDR2.IMGT.t.a", | 33 "CDR2.IMGT.t.a", |
109 | 106 |
110 for(col in cleanup_columns){ | 107 for(col in cleanup_columns){ |
111 dat[,col] = gsub("\\(.*\\)", "", dat[,col]) | 108 dat[,col] = gsub("\\(.*\\)", "", dat[,col]) |
112 #dat[dat[,col] == "",] = "0" | 109 #dat[dat[,col] == "",] = "0" |
113 dat[,col] = as.numeric(dat[,col]) | 110 dat[,col] = as.numeric(dat[,col]) |
114 dat[is.na(dat[,col]),] = 0 | 111 dat[is.na(dat[,col]),col] = 0 |
115 } | 112 } |
116 | 113 |
117 regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3") | 114 regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3") |
118 if(!include_fr1){ | 115 if(!include_fr1){ |
119 regions = c("CDR1", "FR2", "CDR2", "FR3") | 116 regions = c("CDR1", "FR2", "CDR2", "FR3") |
168 mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "transitionMutationsAtAT", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR") | 165 mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "transitionMutationsAtAT", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR") |
169 | 166 |
170 write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T) | 167 write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T) |
171 | 168 |
172 setwd(outputdir) | 169 setwd(outputdir) |
173 | |
174 | 170 |
175 calculate_result = function(i, gene, dat, matrx, f, fname, name){ | 171 calculate_result = function(i, gene, dat, matrx, f, fname, name){ |
176 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] | 172 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] |
177 | 173 |
178 j = i - 1 | 174 j = i - 1 |
311 | 307 |
312 if (!("ggplot2" %in% rownames(installed.packages()))) { | 308 if (!("ggplot2" %in% rownames(installed.packages()))) { |
313 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") | 309 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") |
314 } | 310 } |
315 | 311 |
316 | |
317 genesForPlot = gsub("[0-9]", "", dat$best_match) | 312 genesForPlot = gsub("[0-9]", "", dat$best_match) |
318 genesForPlot = data.frame(table(genesForPlot)) | 313 genesForPlot = data.frame(table(genesForPlot)) |
319 colnames(genesForPlot) = c("Gene","Freq") | 314 colnames(genesForPlot) = c("Gene","Freq") |
320 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | 315 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
321 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T) | 316 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T) |
327 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")")) | 322 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")")) |
328 | 323 |
329 png(filename="all.png") | 324 png(filename="all.png") |
330 pc | 325 pc |
331 dev.off() | 326 dev.off() |
332 | |
333 | 327 |
334 #blegh | 328 #blegh |
335 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match | 329 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match |
336 if(length(genesForPlot) > 0){ | 330 if(length(genesForPlot) > 0){ |
337 genesForPlot = data.frame(table(genesForPlot)) | 331 genesForPlot = data.frame(table(genesForPlot)) |