comparison mutation_analysis.r @ 7:cb7c65e3e43f draft

Uploaded
author davidvanzessen
date Wed, 11 Mar 2015 08:56:37 -0400
parents 069419cccba4
children d84c9791d8c4
comparison
equal deleted inserted replaced
6:3c2230868300 7:cb7c65e3e43f
100 #dat[dat[,col] == "",] = "0" 100 #dat[dat[,col] == "",] = "0"
101 dat[,col] = as.numeric(dat[,col]) 101 dat[,col] = as.numeric(dat[,col])
102 dat[is.na(dat[,col]),] = 0 102 dat[is.na(dat[,col]),] = 0
103 } 103 }
104 104
105 dat$VRegionMutations = dat$FR1.IMGT.Nb.of.mutations + 105 dat$VRegionMutations = dat$CDR1.IMGT.Nb.of.mutations +
106 dat$CDR1.IMGT.Nb.of.mutations +
107 dat$FR2.IMGT.Nb.of.mutations + 106 dat$FR2.IMGT.Nb.of.mutations +
108 dat$CDR2.IMGT.Nb.of.mutations + 107 dat$CDR2.IMGT.Nb.of.mutations +
109 dat$FR3.IMGT.Nb.of.mutations 108 dat$FR3.IMGT.Nb.of.mutations
110 109
111 dat$VRegionNucleotides = dat$FR1.IMGT.Nb.of.nucleotides + 110 dat$VRegionNucleotides = dat$CDR1.IMGT.Nb.of.nucleotides +
112 dat$CDR1.IMGT.Nb.of.nucleotides +
113 dat$FR2.IMGT.Nb.of.nucleotides + 111 dat$FR2.IMGT.Nb.of.nucleotides +
114 dat$CDR2.IMGT.Nb.of.nucleotides + 112 dat$CDR2.IMGT.Nb.of.nucleotides +
115 dat$FR3.IMGT.Nb.of.nucleotides 113 dat$FR3.IMGT.Nb.of.nucleotides
116 114
117 dat$transitionMutations = dat$FR1.IMGT.a.g + 115 dat$transitionMutations = dat$CDR1.IMGT.a.g +
118 dat$FR1.IMGT.g.a +
119 dat$FR1.IMGT.c.t +
120 dat$FR1.IMGT.t.c +
121 dat$CDR1.IMGT.a.g +
122 dat$CDR1.IMGT.g.a + 116 dat$CDR1.IMGT.g.a +
123 dat$CDR1.IMGT.c.t + 117 dat$CDR1.IMGT.c.t +
124 dat$CDR1.IMGT.t.c + 118 dat$CDR1.IMGT.t.c +
125 dat$FR2.IMGT.a.g + 119 dat$FR2.IMGT.a.g +
126 dat$FR2.IMGT.g.a + 120 dat$FR2.IMGT.g.a +
133 dat$FR3.IMGT.a.g + 127 dat$FR3.IMGT.a.g +
134 dat$FR3.IMGT.g.a + 128 dat$FR3.IMGT.g.a +
135 dat$FR3.IMGT.c.t + 129 dat$FR3.IMGT.c.t +
136 dat$FR3.IMGT.t.c 130 dat$FR3.IMGT.t.c
137 131
138 dat$transversionMutations = dat$FR1.IMGT.a.c + 132 dat$transversionMutations = dat$CDR1.IMGT.a.c +
139 dat$FR1.IMGT.c.a +
140 dat$FR1.IMGT.a.t +
141 dat$FR1.IMGT.t.a +
142 dat$FR1.IMGT.g.c +
143 dat$FR1.IMGT.c.g +
144 dat$FR1.IMGT.g.t +
145 dat$FR1.IMGT.t.g +
146 dat$CDR1.IMGT.a.c +
147 dat$CDR1.IMGT.c.a + 133 dat$CDR1.IMGT.c.a +
148 dat$CDR1.IMGT.a.t + 134 dat$CDR1.IMGT.a.t +
149 dat$CDR1.IMGT.t.a + 135 dat$CDR1.IMGT.t.a +
150 dat$CDR1.IMGT.g.c + 136 dat$CDR1.IMGT.g.c +
151 dat$CDR1.IMGT.c.g + 137 dat$CDR1.IMGT.c.g +
175 dat$FR3.IMGT.c.g + 161 dat$FR3.IMGT.c.g +
176 dat$FR3.IMGT.g.t + 162 dat$FR3.IMGT.g.t +
177 dat$FR3.IMGT.t.g 163 dat$FR3.IMGT.t.g
178 164
179 165
180 dat$transitionMutationsAtGC = dat$FR1.IMGT.g.a + 166 dat$transitionMutationsAtGC = dat$CDR1.IMGT.g.a +
181 dat$FR1.IMGT.c.t +
182 dat$CDR1.IMGT.g.a +
183 dat$CDR1.IMGT.c.t + 167 dat$CDR1.IMGT.c.t +
184 dat$FR2.IMGT.g.a + 168 dat$FR2.IMGT.g.a +
185 dat$FR2.IMGT.c.t + 169 dat$FR2.IMGT.c.t +
186 dat$CDR2.IMGT.g.a + 170 dat$CDR2.IMGT.g.a +
187 dat$CDR2.IMGT.c.t + 171 dat$CDR2.IMGT.c.t +
188 dat$FR3.IMGT.g.a + 172 dat$FR3.IMGT.g.a +
189 dat$FR3.IMGT.c.t 173 dat$FR3.IMGT.c.t
190 174
191 dat$totalMutationsAtGC = dat$FR1.IMGT.g.a + 175 dat$totalMutationsAtGC = dat$CDR1.IMGT.g.a +
192 dat$FR1.IMGT.c.t +
193 dat$FR1.IMGT.c.a +
194 dat$FR1.IMGT.g.c +
195 dat$FR1.IMGT.c.g +
196 dat$FR1.IMGT.g.t +
197 dat$CDR1.IMGT.g.a +
198 dat$CDR1.IMGT.c.t + 176 dat$CDR1.IMGT.c.t +
199 dat$CDR1.IMGT.c.a + 177 dat$CDR1.IMGT.c.a +
200 dat$CDR1.IMGT.g.c + 178 dat$CDR1.IMGT.g.c +
201 dat$CDR1.IMGT.c.g + 179 dat$CDR1.IMGT.c.g +
202 dat$CDR1.IMGT.g.t + 180 dat$CDR1.IMGT.g.t +
268 if(nt1 == nt2){ 246 if(nt1 == nt2){
269 next 247 next
270 } 248 }
271 NT1 = LETTERS[letters == nt1] 249 NT1 = LETTERS[letters == nt1]
272 NT2 = LETTERS[letters == nt2] 250 NT2 = LETTERS[letters == nt2]
273 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") 251 FR1 = 0 #paste("FR1.IMGT.", nt1, ".", nt2, sep="")
274 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") 252 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="")
275 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") 253 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="")
276 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") 254 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="")
277 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") 255 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="")
278 transitionTable[NT1,NT2] = sum( tmp[,FR1] + 256 transitionTable[NT1,NT2] = sum( tmp[,CDR1] +
279 tmp[,CDR1] + 257 tmp[,FR2] +
280 tmp[,FR2] + 258 tmp[,CDR2] +
281 tmp[,CDR2] + 259 tmp[,FR3])
282 tmp[,FR3])
283 } 260 }
284 } 261 }
285 write.table(x=transitionTable, file=paste("transitions_", gene ,".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) 262 write.table(x=transitionTable, file=paste("transitions_", gene ,".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
286 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", gene ,".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) 263 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", gene ,".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
287 264
330 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") 307 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="")
331 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") 308 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="")
332 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") 309 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="")
333 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") 310 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="")
334 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") 311 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="")
335 transitionTable[NT1,NT2] = sum( tmp[,FR1] + 312 transitionTable[NT1,NT2] = sum( tmp[,CDR1] +
336 tmp[,CDR1] + 313 tmp[,FR2] +
337 tmp[,FR2] + 314 tmp[,CDR2] +
338 tmp[,CDR2] + 315 tmp[,FR3])
339 tmp[,FR3])
340 } 316 }
341 } 317 }
342 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) 318 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA)
343 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file="matched_all.txt", sep="\t",quote=F,row.names=F,col.names=T) 319 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file="matched_all.txt", sep="\t",quote=F,row.names=F,col.names=T)
344 cat(matrx[1,x], file="total_value.txt") 320 cat(matrx[1,x], file="total_value.txt")