Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 7:cb7c65e3e43f draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 11 Mar 2015 08:56:37 -0400 |
parents | 069419cccba4 |
children | d84c9791d8c4 |
comparison
equal
deleted
inserted
replaced
6:3c2230868300 | 7:cb7c65e3e43f |
---|---|
100 #dat[dat[,col] == "",] = "0" | 100 #dat[dat[,col] == "",] = "0" |
101 dat[,col] = as.numeric(dat[,col]) | 101 dat[,col] = as.numeric(dat[,col]) |
102 dat[is.na(dat[,col]),] = 0 | 102 dat[is.na(dat[,col]),] = 0 |
103 } | 103 } |
104 | 104 |
105 dat$VRegionMutations = dat$FR1.IMGT.Nb.of.mutations + | 105 dat$VRegionMutations = dat$CDR1.IMGT.Nb.of.mutations + |
106 dat$CDR1.IMGT.Nb.of.mutations + | |
107 dat$FR2.IMGT.Nb.of.mutations + | 106 dat$FR2.IMGT.Nb.of.mutations + |
108 dat$CDR2.IMGT.Nb.of.mutations + | 107 dat$CDR2.IMGT.Nb.of.mutations + |
109 dat$FR3.IMGT.Nb.of.mutations | 108 dat$FR3.IMGT.Nb.of.mutations |
110 | 109 |
111 dat$VRegionNucleotides = dat$FR1.IMGT.Nb.of.nucleotides + | 110 dat$VRegionNucleotides = dat$CDR1.IMGT.Nb.of.nucleotides + |
112 dat$CDR1.IMGT.Nb.of.nucleotides + | |
113 dat$FR2.IMGT.Nb.of.nucleotides + | 111 dat$FR2.IMGT.Nb.of.nucleotides + |
114 dat$CDR2.IMGT.Nb.of.nucleotides + | 112 dat$CDR2.IMGT.Nb.of.nucleotides + |
115 dat$FR3.IMGT.Nb.of.nucleotides | 113 dat$FR3.IMGT.Nb.of.nucleotides |
116 | 114 |
117 dat$transitionMutations = dat$FR1.IMGT.a.g + | 115 dat$transitionMutations = dat$CDR1.IMGT.a.g + |
118 dat$FR1.IMGT.g.a + | |
119 dat$FR1.IMGT.c.t + | |
120 dat$FR1.IMGT.t.c + | |
121 dat$CDR1.IMGT.a.g + | |
122 dat$CDR1.IMGT.g.a + | 116 dat$CDR1.IMGT.g.a + |
123 dat$CDR1.IMGT.c.t + | 117 dat$CDR1.IMGT.c.t + |
124 dat$CDR1.IMGT.t.c + | 118 dat$CDR1.IMGT.t.c + |
125 dat$FR2.IMGT.a.g + | 119 dat$FR2.IMGT.a.g + |
126 dat$FR2.IMGT.g.a + | 120 dat$FR2.IMGT.g.a + |
133 dat$FR3.IMGT.a.g + | 127 dat$FR3.IMGT.a.g + |
134 dat$FR3.IMGT.g.a + | 128 dat$FR3.IMGT.g.a + |
135 dat$FR3.IMGT.c.t + | 129 dat$FR3.IMGT.c.t + |
136 dat$FR3.IMGT.t.c | 130 dat$FR3.IMGT.t.c |
137 | 131 |
138 dat$transversionMutations = dat$FR1.IMGT.a.c + | 132 dat$transversionMutations = dat$CDR1.IMGT.a.c + |
139 dat$FR1.IMGT.c.a + | |
140 dat$FR1.IMGT.a.t + | |
141 dat$FR1.IMGT.t.a + | |
142 dat$FR1.IMGT.g.c + | |
143 dat$FR1.IMGT.c.g + | |
144 dat$FR1.IMGT.g.t + | |
145 dat$FR1.IMGT.t.g + | |
146 dat$CDR1.IMGT.a.c + | |
147 dat$CDR1.IMGT.c.a + | 133 dat$CDR1.IMGT.c.a + |
148 dat$CDR1.IMGT.a.t + | 134 dat$CDR1.IMGT.a.t + |
149 dat$CDR1.IMGT.t.a + | 135 dat$CDR1.IMGT.t.a + |
150 dat$CDR1.IMGT.g.c + | 136 dat$CDR1.IMGT.g.c + |
151 dat$CDR1.IMGT.c.g + | 137 dat$CDR1.IMGT.c.g + |
175 dat$FR3.IMGT.c.g + | 161 dat$FR3.IMGT.c.g + |
176 dat$FR3.IMGT.g.t + | 162 dat$FR3.IMGT.g.t + |
177 dat$FR3.IMGT.t.g | 163 dat$FR3.IMGT.t.g |
178 | 164 |
179 | 165 |
180 dat$transitionMutationsAtGC = dat$FR1.IMGT.g.a + | 166 dat$transitionMutationsAtGC = dat$CDR1.IMGT.g.a + |
181 dat$FR1.IMGT.c.t + | |
182 dat$CDR1.IMGT.g.a + | |
183 dat$CDR1.IMGT.c.t + | 167 dat$CDR1.IMGT.c.t + |
184 dat$FR2.IMGT.g.a + | 168 dat$FR2.IMGT.g.a + |
185 dat$FR2.IMGT.c.t + | 169 dat$FR2.IMGT.c.t + |
186 dat$CDR2.IMGT.g.a + | 170 dat$CDR2.IMGT.g.a + |
187 dat$CDR2.IMGT.c.t + | 171 dat$CDR2.IMGT.c.t + |
188 dat$FR3.IMGT.g.a + | 172 dat$FR3.IMGT.g.a + |
189 dat$FR3.IMGT.c.t | 173 dat$FR3.IMGT.c.t |
190 | 174 |
191 dat$totalMutationsAtGC = dat$FR1.IMGT.g.a + | 175 dat$totalMutationsAtGC = dat$CDR1.IMGT.g.a + |
192 dat$FR1.IMGT.c.t + | |
193 dat$FR1.IMGT.c.a + | |
194 dat$FR1.IMGT.g.c + | |
195 dat$FR1.IMGT.c.g + | |
196 dat$FR1.IMGT.g.t + | |
197 dat$CDR1.IMGT.g.a + | |
198 dat$CDR1.IMGT.c.t + | 176 dat$CDR1.IMGT.c.t + |
199 dat$CDR1.IMGT.c.a + | 177 dat$CDR1.IMGT.c.a + |
200 dat$CDR1.IMGT.g.c + | 178 dat$CDR1.IMGT.g.c + |
201 dat$CDR1.IMGT.c.g + | 179 dat$CDR1.IMGT.c.g + |
202 dat$CDR1.IMGT.g.t + | 180 dat$CDR1.IMGT.g.t + |
268 if(nt1 == nt2){ | 246 if(nt1 == nt2){ |
269 next | 247 next |
270 } | 248 } |
271 NT1 = LETTERS[letters == nt1] | 249 NT1 = LETTERS[letters == nt1] |
272 NT2 = LETTERS[letters == nt2] | 250 NT2 = LETTERS[letters == nt2] |
273 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") | 251 FR1 = 0 #paste("FR1.IMGT.", nt1, ".", nt2, sep="") |
274 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") | 252 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") |
275 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") | 253 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") |
276 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") | 254 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") |
277 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") | 255 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") |
278 transitionTable[NT1,NT2] = sum( tmp[,FR1] + | 256 transitionTable[NT1,NT2] = sum( tmp[,CDR1] + |
279 tmp[,CDR1] + | 257 tmp[,FR2] + |
280 tmp[,FR2] + | 258 tmp[,CDR2] + |
281 tmp[,CDR2] + | 259 tmp[,FR3]) |
282 tmp[,FR3]) | |
283 } | 260 } |
284 } | 261 } |
285 write.table(x=transitionTable, file=paste("transitions_", gene ,".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | 262 write.table(x=transitionTable, file=paste("transitions_", gene ,".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) |
286 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", gene ,".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | 263 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", gene ,".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) |
287 | 264 |
330 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") | 307 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") |
331 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") | 308 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") |
332 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") | 309 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") |
333 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") | 310 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") |
334 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") | 311 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") |
335 transitionTable[NT1,NT2] = sum( tmp[,FR1] + | 312 transitionTable[NT1,NT2] = sum( tmp[,CDR1] + |
336 tmp[,CDR1] + | 313 tmp[,FR2] + |
337 tmp[,FR2] + | 314 tmp[,CDR2] + |
338 tmp[,CDR2] + | 315 tmp[,FR3]) |
339 tmp[,FR3]) | |
340 } | 316 } |
341 } | 317 } |
342 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) | 318 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) |
343 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file="matched_all.txt", sep="\t",quote=F,row.names=F,col.names=T) | 319 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file="matched_all.txt", sep="\t",quote=F,row.names=F,col.names=T) |
344 cat(matrx[1,x], file="total_value.txt") | 320 cat(matrx[1,x], file="total_value.txt") |