comparison mutation_analysis.r @ 73:13c3710604ef draft

Uploaded
author davidvanzessen
date Wed, 04 May 2016 08:40:31 -0400
parents 51d92233fb5d
children b523ce95d857
comparison
equal deleted inserted replaced
72:51d92233fb5d 73:13c3710604ef
4 args <- commandArgs(trailingOnly = TRUE) 4 args <- commandArgs(trailingOnly = TRUE)
5 5
6 input = args[1] 6 input = args[1]
7 genes = unlist(strsplit(args[2], ",")) 7 genes = unlist(strsplit(args[2], ","))
8 outputdir = args[3] 8 outputdir = args[3]
9 print(args[4])
10 include_fr1 = ifelse(args[4] == "yes", T, F) 9 include_fr1 = ifelse(args[4] == "yes", T, F)
11 setwd(outputdir) 10 setwd(outputdir)
12 11
13 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) 12 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F)
14 13
25 transitionTable["T","T"] = NA 24 transitionTable["T","T"] = NA
26 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) 25 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA)
27 cat("0", file="n.txt") 26 cat("0", file="n.txt")
28 stop("No data") 27 stop("No data")
29 } 28 }
30
31
32 29
33 cleanup_columns = c("FR1.IMGT.c.a", 30 cleanup_columns = c("FR1.IMGT.c.a",
34 "FR2.IMGT.g.t", 31 "FR2.IMGT.g.t",
35 "CDR1.IMGT.Nb.of.nucleotides", 32 "CDR1.IMGT.Nb.of.nucleotides",
36 "CDR2.IMGT.t.a", 33 "CDR2.IMGT.t.a",
109 106
110 for(col in cleanup_columns){ 107 for(col in cleanup_columns){
111 dat[,col] = gsub("\\(.*\\)", "", dat[,col]) 108 dat[,col] = gsub("\\(.*\\)", "", dat[,col])
112 #dat[dat[,col] == "",] = "0" 109 #dat[dat[,col] == "",] = "0"
113 dat[,col] = as.numeric(dat[,col]) 110 dat[,col] = as.numeric(dat[,col])
114 dat[is.na(dat[,col]),] = 0 111 dat[is.na(dat[,col]),col] = 0
115 } 112 }
116 113
117 regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3") 114 regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3")
118 if(!include_fr1){ 115 if(!include_fr1){
119 regions = c("CDR1", "FR2", "CDR2", "FR3") 116 regions = c("CDR1", "FR2", "CDR2", "FR3")
168 mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "transitionMutationsAtAT", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR") 165 mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "transitionMutationsAtAT", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR")
169 166
170 write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T) 167 write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T)
171 168
172 setwd(outputdir) 169 setwd(outputdir)
173
174 170
175 calculate_result = function(i, gene, dat, matrx, f, fname, name){ 171 calculate_result = function(i, gene, dat, matrx, f, fname, name){
176 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] 172 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),]
177 173
178 j = i - 1 174 j = i - 1
311 307
312 if (!("ggplot2" %in% rownames(installed.packages()))) { 308 if (!("ggplot2" %in% rownames(installed.packages()))) {
313 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") 309 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/")
314 } 310 }
315 311
316
317 genesForPlot = gsub("[0-9]", "", dat$best_match) 312 genesForPlot = gsub("[0-9]", "", dat$best_match)
318 genesForPlot = data.frame(table(genesForPlot)) 313 genesForPlot = data.frame(table(genesForPlot))
319 colnames(genesForPlot) = c("Gene","Freq") 314 colnames(genesForPlot) = c("Gene","Freq")
320 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) 315 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
321 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T) 316 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T)
327 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")")) 322 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")"))
328 323
329 png(filename="all.png") 324 png(filename="all.png")
330 pc 325 pc
331 dev.off() 326 dev.off()
332
333 327
334 #blegh 328 #blegh
335 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match 329 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match
336 if(length(genesForPlot) > 0){ 330 if(length(genesForPlot) > 0){
337 genesForPlot = data.frame(table(genesForPlot)) 331 genesForPlot = data.frame(table(genesForPlot))