comparison mutation_analysis.r @ 3:a0b27058dcac draft

Uploaded
author davidvanzessen
date Wed, 17 Sep 2014 07:25:17 -0400
parents 2f4298673519
children 069419cccba4
comparison
equal deleted inserted replaced
2:2f4298673519 3:a0b27058dcac
7 7
8 #dat = read.table("NWK276_MID6_25NT/8_V-REGION-nt-mutation-statistics_NWK276_MID6_25NT_051113.txt", header=T, sep="\t", fill=T, stringsAsFactors=F) 8 #dat = read.table("NWK276_MID6_25NT/8_V-REGION-nt-mutation-statistics_NWK276_MID6_25NT_051113.txt", header=T, sep="\t", fill=T, stringsAsFactors=F)
9 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) 9 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F)
10 10
11 datSum = read.table(summaryinput, header=T, sep="\t", fill=T, stringsAsFactors=F) 11 datSum = read.table(summaryinput, header=T, sep="\t", fill=T, stringsAsFactors=F)
12 datSum = datSum[,c("Sequence.ID", "AA.JUNCTION")] 12 datSum = datSum[,c("Sequence.ID","J.GENE.and.allele", "AA.JUNCTION")]
13 13
14 dat = merge(dat, datSum, by="Sequence.ID", all.x=T) 14 dat = merge(dat, datSum, by="Sequence.ID", all.x=T)
15
16 #dat = dat[dat$Functionality == "productive",]
17
18 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
19 dat$VGene = gsub("[*].*", "", dat$VGene)
20
21 dat$past = paste(dat$AA.JUNCTION, dat$VGene)
22
23 #dat = dat[!duplicated(dat$past), ]
24 15
25 if(length(dat$Sequence.ID) == 0){ 16 if(length(dat$Sequence.ID) == 0){
26 setwd(outputdir) 17 setwd(outputdir)
27 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) 18 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5))
28 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") 19 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)")
118 #dat[dat[,col] == "",] = "0" 109 #dat[dat[,col] == "",] = "0"
119 dat[,col] = as.numeric(dat[,col]) 110 dat[,col] = as.numeric(dat[,col])
120 dat[is.na(dat[,col]),] = 0 111 dat[is.na(dat[,col]),] = 0
121 } 112 }
122 113
114 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
115 dat$VGene = gsub("[*].*", "", dat$VGene)
116 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
117 dat$JGene = gsub("[*].*", "", dat$JGene)
118
119 dat$past = paste(dat$AA.JUNCTION, dat$VGene, dat$JGene, (dat$FR1.IMGT.Nb.of.mutations + dat$CDR1.IMGT.Nb.of.mutations + dat$FR2.IMGT.Nb.of.mutations + dat$CDR2.IMGT.Nb.of.mutations + dat$FR3.IMGT.Nb.of.mutations))
120
121 dat = dat[!duplicated(dat$past), ]
122
123 VRegionMutations = sum(dat$FR1.IMGT.Nb.of.mutations + 123 VRegionMutations = sum(dat$FR1.IMGT.Nb.of.mutations +
124 dat$CDR1.IMGT.Nb.of.mutations + 124 dat$CDR1.IMGT.Nb.of.mutations +
125 dat$FR2.IMGT.Nb.of.mutations + 125 dat$FR2.IMGT.Nb.of.mutations +
126 dat$CDR2.IMGT.Nb.of.mutations + 126 dat$CDR2.IMGT.Nb.of.mutations +
127 dat$FR3.IMGT.Nb.of.mutations) 127 dat$FR3.IMGT.Nb.of.mutations)