Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.r @ 4:069419cccba4 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 22 Sep 2014 10:19:36 -0400 |
| parents | a0b27058dcac |
| children | cb7c65e3e43f |
comparison
equal
deleted
inserted
replaced
| 3:a0b27058dcac | 4:069419cccba4 |
|---|---|
| 1 args <- commandArgs(trailingOnly = TRUE) | 1 args <- commandArgs(trailingOnly = TRUE) |
| 2 | 2 |
| 3 input = args[1] | 3 input = args[1] |
| 4 summaryinput = args[2] | 4 genes = unlist(strsplit(args[2], ",")) |
| 5 outputdir = args[3] | 5 outputdir = args[3] |
| 6 setwd(outputdir) | 6 setwd(outputdir) |
| 7 | 7 |
| 8 #dat = read.table("NWK276_MID6_25NT/8_V-REGION-nt-mutation-statistics_NWK276_MID6_25NT_051113.txt", header=T, sep="\t", fill=T, stringsAsFactors=F) | |
| 9 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) | 8 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F) |
| 10 | 9 |
| 11 datSum = read.table(summaryinput, header=T, sep="\t", fill=T, stringsAsFactors=F) | |
| 12 datSum = datSum[,c("Sequence.ID","J.GENE.and.allele", "AA.JUNCTION")] | |
| 13 | |
| 14 dat = merge(dat, datSum, by="Sequence.ID", all.x=T) | |
| 15 | |
| 16 if(length(dat$Sequence.ID) == 0){ | 10 if(length(dat$Sequence.ID) == 0){ |
| 17 setwd(outputdir) | 11 setwd(outputdir) |
| 18 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) | 12 result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5)) |
| 19 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") | 13 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") |
| 20 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) | 14 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) |
| 21 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) | 15 transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4)) |
| 22 row.names(transitionTable) = c("A", "C", "G", "T") | 16 row.names(transitionTable) = c("A", "C", "G", "T") |
| 23 transitionTable["A","A"] = NA | 17 transitionTable["A","A"] = NA |
| 24 transitionTable["C","C"] = NA | 18 transitionTable["C","C"] = NA |
| 25 transitionTable["G","G"] = NA | 19 transitionTable["G","G"] = NA |
| 26 transitionTable["T","T"] = NA | 20 transitionTable["T","T"] = NA |
| 27 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) | 21 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) |
| 28 cat("0", file="n.txt") | 22 cat("0", file="n.txt") |
| 29 stop("No data") | 23 stop("No data") |
| 30 } | 24 } |
| 31 | 25 |
| 32 #print(paste("After filtering on 'productive' and deduplicating based on V and AA JUNCTION there are", length(dat$Sequence.ID), "sequences left")) | 26 |
| 33 | |
| 34 result = data.frame(x = 1:5, y = 1:5, z = 1:5) | |
| 35 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)") | |
| 36 | 27 |
| 37 cleanup_columns = c("FR1.IMGT.c.a", | 28 cleanup_columns = c("FR1.IMGT.c.a", |
| 38 "FR2.IMGT.g.t", | 29 "FR2.IMGT.g.t", |
| 39 "CDR1.IMGT.Nb.of.nucleotides", | 30 "CDR1.IMGT.Nb.of.nucleotides", |
| 40 "CDR2.IMGT.t.a", | 31 "CDR2.IMGT.t.a", |
| 109 #dat[dat[,col] == "",] = "0" | 100 #dat[dat[,col] == "",] = "0" |
| 110 dat[,col] = as.numeric(dat[,col]) | 101 dat[,col] = as.numeric(dat[,col]) |
| 111 dat[is.na(dat[,col]),] = 0 | 102 dat[is.na(dat[,col]),] = 0 |
| 112 } | 103 } |
| 113 | 104 |
| 114 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) | 105 dat$VRegionMutations = dat$FR1.IMGT.Nb.of.mutations + |
| 115 dat$VGene = gsub("[*].*", "", dat$VGene) | |
| 116 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) | |
| 117 dat$JGene = gsub("[*].*", "", dat$JGene) | |
| 118 | |
| 119 dat$past = paste(dat$AA.JUNCTION, dat$VGene, dat$JGene, (dat$FR1.IMGT.Nb.of.mutations + dat$CDR1.IMGT.Nb.of.mutations + dat$FR2.IMGT.Nb.of.mutations + dat$CDR2.IMGT.Nb.of.mutations + dat$FR3.IMGT.Nb.of.mutations)) | |
| 120 | |
| 121 dat = dat[!duplicated(dat$past), ] | |
| 122 | |
| 123 VRegionMutations = sum(dat$FR1.IMGT.Nb.of.mutations + | |
| 124 dat$CDR1.IMGT.Nb.of.mutations + | 106 dat$CDR1.IMGT.Nb.of.mutations + |
| 125 dat$FR2.IMGT.Nb.of.mutations + | 107 dat$FR2.IMGT.Nb.of.mutations + |
| 126 dat$CDR2.IMGT.Nb.of.mutations + | 108 dat$CDR2.IMGT.Nb.of.mutations + |
| 127 dat$FR3.IMGT.Nb.of.mutations) | 109 dat$FR3.IMGT.Nb.of.mutations |
| 128 | 110 |
| 129 VRegionNucleotides = sum( dat$FR1.IMGT.Nb.of.nucleotides + | 111 dat$VRegionNucleotides = dat$FR1.IMGT.Nb.of.nucleotides + |
| 130 dat$CDR1.IMGT.Nb.of.nucleotides + | 112 dat$CDR1.IMGT.Nb.of.nucleotides + |
| 131 dat$FR2.IMGT.Nb.of.nucleotides + | 113 dat$FR2.IMGT.Nb.of.nucleotides + |
| 132 dat$CDR2.IMGT.Nb.of.nucleotides + | 114 dat$CDR2.IMGT.Nb.of.nucleotides + |
| 133 dat$FR3.IMGT.Nb.of.nucleotides) | 115 dat$FR3.IMGT.Nb.of.nucleotides |
| 134 | 116 |
| 135 result[1,"x"] = VRegionMutations | 117 dat$transitionMutations = dat$FR1.IMGT.a.g + |
| 136 result[1,"y"] = VRegionNucleotides | |
| 137 result[1,"z"] = round(result[1,"x"] / result[1,"y"] * 100, digits=1) | |
| 138 | |
| 139 transitionMutations = sum(dat$FR1.IMGT.a.g + | |
| 140 dat$FR1.IMGT.g.a + | 118 dat$FR1.IMGT.g.a + |
| 141 dat$FR1.IMGT.c.t + | 119 dat$FR1.IMGT.c.t + |
| 142 dat$FR1.IMGT.t.c + | 120 dat$FR1.IMGT.t.c + |
| 143 dat$CDR1.IMGT.a.g + | 121 dat$CDR1.IMGT.a.g + |
| 144 dat$CDR1.IMGT.g.a + | 122 dat$CDR1.IMGT.g.a + |
| 153 dat$CDR2.IMGT.c.t + | 131 dat$CDR2.IMGT.c.t + |
| 154 dat$CDR2.IMGT.t.c + | 132 dat$CDR2.IMGT.t.c + |
| 155 dat$FR3.IMGT.a.g + | 133 dat$FR3.IMGT.a.g + |
| 156 dat$FR3.IMGT.g.a + | 134 dat$FR3.IMGT.g.a + |
| 157 dat$FR3.IMGT.c.t + | 135 dat$FR3.IMGT.c.t + |
| 158 dat$FR3.IMGT.t.c) | 136 dat$FR3.IMGT.t.c |
| 159 | 137 |
| 160 result[2,"x"] = transitionMutations | 138 dat$transversionMutations = dat$FR1.IMGT.a.c + |
| 161 result[2,"y"] = VRegionMutations | 139 dat$FR1.IMGT.c.a + |
| 162 result[2,"z"] = round(result[2,"x"] / result[2,"y"] * 100, digits=1) | 140 dat$FR1.IMGT.a.t + |
| 163 | 141 dat$FR1.IMGT.t.a + |
| 164 transversionMutations = sum( dat$FR1.IMGT.a.c + | 142 dat$FR1.IMGT.g.c + |
| 165 dat$FR1.IMGT.c.a + | 143 dat$FR1.IMGT.c.g + |
| 166 dat$FR1.IMGT.a.t + | 144 dat$FR1.IMGT.g.t + |
| 167 dat$FR1.IMGT.t.a + | 145 dat$FR1.IMGT.t.g + |
| 168 dat$FR1.IMGT.g.c + | 146 dat$CDR1.IMGT.a.c + |
| 169 dat$FR1.IMGT.c.g + | 147 dat$CDR1.IMGT.c.a + |
| 170 dat$FR1.IMGT.g.t + | 148 dat$CDR1.IMGT.a.t + |
| 171 dat$FR1.IMGT.t.g + | 149 dat$CDR1.IMGT.t.a + |
| 172 dat$CDR1.IMGT.a.c + | 150 dat$CDR1.IMGT.g.c + |
| 173 dat$CDR1.IMGT.c.a + | 151 dat$CDR1.IMGT.c.g + |
| 174 dat$CDR1.IMGT.a.t + | 152 dat$CDR1.IMGT.g.t + |
| 175 dat$CDR1.IMGT.t.a + | 153 dat$CDR1.IMGT.t.g + |
| 176 dat$CDR1.IMGT.g.c + | 154 dat$FR2.IMGT.a.c + |
| 177 dat$CDR1.IMGT.c.g + | 155 dat$FR2.IMGT.c.a + |
| 178 dat$CDR1.IMGT.g.t + | 156 dat$FR2.IMGT.a.t + |
| 179 dat$CDR1.IMGT.t.g + | 157 dat$FR2.IMGT.t.a + |
| 180 dat$FR2.IMGT.a.c + | 158 dat$FR2.IMGT.g.c + |
| 181 dat$FR2.IMGT.c.a + | 159 dat$FR2.IMGT.c.g + |
| 182 dat$FR2.IMGT.a.t + | 160 dat$FR2.IMGT.g.t + |
| 183 dat$FR2.IMGT.t.a + | 161 dat$FR2.IMGT.t.g + |
| 184 dat$FR2.IMGT.g.c + | 162 dat$CDR2.IMGT.a.c + |
| 185 dat$FR2.IMGT.c.g + | 163 dat$CDR2.IMGT.c.a + |
| 186 dat$FR2.IMGT.g.t + | 164 dat$CDR2.IMGT.a.t + |
| 187 dat$FR2.IMGT.t.g + | 165 dat$CDR2.IMGT.t.a + |
| 188 dat$CDR2.IMGT.a.c + | 166 dat$CDR2.IMGT.g.c + |
| 189 dat$CDR2.IMGT.c.a + | 167 dat$CDR2.IMGT.c.g + |
| 190 dat$CDR2.IMGT.a.t + | 168 dat$CDR2.IMGT.g.t + |
| 191 dat$CDR2.IMGT.t.a + | 169 dat$CDR2.IMGT.t.g + |
| 192 dat$CDR2.IMGT.g.c + | 170 dat$FR3.IMGT.a.c + |
| 193 dat$CDR2.IMGT.c.g + | 171 dat$FR3.IMGT.c.a + |
| 194 dat$CDR2.IMGT.g.t + | 172 dat$FR3.IMGT.a.t + |
| 195 dat$CDR2.IMGT.t.g + | 173 dat$FR3.IMGT.t.a + |
| 196 dat$FR3.IMGT.a.c + | 174 dat$FR3.IMGT.g.c + |
| 197 dat$FR3.IMGT.c.a + | 175 dat$FR3.IMGT.c.g + |
| 198 dat$FR3.IMGT.a.t + | 176 dat$FR3.IMGT.g.t + |
| 199 dat$FR3.IMGT.t.a + | 177 dat$FR3.IMGT.t.g |
| 200 dat$FR3.IMGT.g.c + | 178 |
| 201 dat$FR3.IMGT.c.g + | 179 |
| 202 dat$FR3.IMGT.g.t + | 180 dat$transitionMutationsAtGC = dat$FR1.IMGT.g.a + |
| 203 dat$FR3.IMGT.t.g) | |
| 204 | |
| 205 result[3,"x"] = transversionMutations | |
| 206 result[3,"y"] = VRegionMutations | |
| 207 result[3,"z"] = round(result[3,"x"] / result[3,"y"] * 100, digits=1) | |
| 208 | |
| 209 transitionMutationsAtGC = sum(dat$FR1.IMGT.g.a + | |
| 210 dat$FR1.IMGT.c.t + | 181 dat$FR1.IMGT.c.t + |
| 211 dat$CDR1.IMGT.g.a + | 182 dat$CDR1.IMGT.g.a + |
| 212 dat$CDR1.IMGT.c.t + | 183 dat$CDR1.IMGT.c.t + |
| 213 dat$FR2.IMGT.g.a + | 184 dat$FR2.IMGT.g.a + |
| 214 dat$FR2.IMGT.c.t + | 185 dat$FR2.IMGT.c.t + |
| 215 dat$CDR2.IMGT.g.a + | 186 dat$CDR2.IMGT.g.a + |
| 216 dat$CDR2.IMGT.c.t + | 187 dat$CDR2.IMGT.c.t + |
| 217 dat$FR3.IMGT.g.a + | 188 dat$FR3.IMGT.g.a + |
| 218 dat$FR3.IMGT.c.t) | 189 dat$FR3.IMGT.c.t |
| 219 | 190 |
| 220 totalMutationsAtGC = sum(dat$FR1.IMGT.g.a + | 191 dat$totalMutationsAtGC = dat$FR1.IMGT.g.a + |
| 221 dat$FR1.IMGT.c.t + | 192 dat$FR1.IMGT.c.t + |
| 222 dat$FR1.IMGT.c.a + | 193 dat$FR1.IMGT.c.a + |
| 223 dat$FR1.IMGT.g.c + | 194 dat$FR1.IMGT.g.c + |
| 224 dat$FR1.IMGT.c.g + | 195 dat$FR1.IMGT.c.g + |
| 225 dat$FR1.IMGT.g.t + | 196 dat$FR1.IMGT.g.t + |
| 244 dat$FR3.IMGT.g.a + | 215 dat$FR3.IMGT.g.a + |
| 245 dat$FR3.IMGT.c.t + | 216 dat$FR3.IMGT.c.t + |
| 246 dat$FR3.IMGT.c.a + | 217 dat$FR3.IMGT.c.a + |
| 247 dat$FR3.IMGT.g.c + | 218 dat$FR3.IMGT.g.c + |
| 248 dat$FR3.IMGT.c.g + | 219 dat$FR3.IMGT.c.g + |
| 249 dat$FR3.IMGT.g.t) | 220 dat$FR3.IMGT.g.t |
| 250 | 221 |
| 251 result[4,"x"] = transitionMutationsAtGC | 222 |
| 252 result[4,"y"] = totalMutationsAtGC | 223 |
| 253 result[4,"z"] = round(result[4,"x"] / result[4,"y"] * 100, digits=1) | 224 setwd(outputdir) |
| 254 | 225 |
| 255 result[5,"x"] = totalMutationsAtGC | 226 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=5) |
| 256 result[5,"y"] = VRegionMutations | 227 for(i in 1:length(genes)){ |
| 257 result[5,"z"] = round(result[5,"x"] / result[5,"y"] * 100, digits=1) | 228 gene = genes[i] |
| 258 | 229 tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),] |
| 259 | 230 if(gene == "."){ |
| 260 #transitiontable | 231 tmp = dat |
| 232 } | |
| 233 if(length(tmp) == 0){ | |
| 234 cat("0", file=paste(gene, "_value.txt" ,sep="")) | |
| 235 next | |
| 236 } | |
| 237 j = i - 1 | |
| 238 x = (j * 3) + 1 | |
| 239 y = (j * 3) + 2 | |
| 240 z = (j * 3) + 3 | |
| 241 matrx[1,x] = sum(tmp$VRegionMutations) | |
| 242 matrx[1,y] = sum(tmp$VRegionNucleotides) | |
| 243 matrx[1,z] = round(matrx[1,x] / matrx[1,y] * 100, digits=1) | |
| 244 matrx[2,x] = sum(tmp$transitionMutations) | |
| 245 matrx[2,y] = sum(tmp$VRegionMutations) | |
| 246 matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1) | |
| 247 matrx[3,x] = sum(tmp$transversionMutations) | |
| 248 matrx[3,y] = sum(tmp$VRegionMutations) | |
| 249 matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1) | |
| 250 matrx[4,x] = sum(tmp$transitionMutationsAtGC) | |
| 251 matrx[4,y] = sum(tmp$totalMutationsAtGC) | |
| 252 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | |
| 253 matrx[5,x] = sum(tmp$totalMutationsAtGC) | |
| 254 matrx[5,y] = sum(tmp$VRegionMutations) | |
| 255 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | |
| 256 | |
| 257 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) | |
| 258 row.names(transitionTable) = c("A", "C", "G", "T") | |
| 259 transitionTable["A","A"] = NA | |
| 260 transitionTable["C","C"] = NA | |
| 261 transitionTable["G","G"] = NA | |
| 262 transitionTable["T","T"] = NA | |
| 263 nts = c("a", "c", "g", "t") | |
| 264 | |
| 265 | |
| 266 for(nt1 in nts){ | |
| 267 for(nt2 in nts){ | |
| 268 if(nt1 == nt2){ | |
| 269 next | |
| 270 } | |
| 271 NT1 = LETTERS[letters == nt1] | |
| 272 NT2 = LETTERS[letters == nt2] | |
| 273 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") | |
| 274 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") | |
| 275 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") | |
| 276 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") | |
| 277 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") | |
| 278 transitionTable[NT1,NT2] = sum( tmp[,FR1] + | |
| 279 tmp[,CDR1] + | |
| 280 tmp[,FR2] + | |
| 281 tmp[,CDR2] + | |
| 282 tmp[,FR3]) | |
| 283 } | |
| 284 } | |
| 285 write.table(x=transitionTable, file=paste("transitions_", gene ,".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA) | |
| 286 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", gene ,".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) | |
| 287 | |
| 288 cat(matrx[1,x], file=paste(gene, "_value.txt" ,sep="")) | |
| 289 cat(length(tmp$Sequence.ID), file=paste(gene, "_n.txt" ,sep="")) | |
| 290 } | |
| 291 | |
| 292 #again for all of the data | |
| 293 tmp = dat | |
| 294 j = i | |
| 295 x = (j * 3) + 1 | |
| 296 y = (j * 3) + 2 | |
| 297 z = (j * 3) + 3 | |
| 298 matrx[1,x] = sum(tmp$VRegionMutations) | |
| 299 matrx[1,y] = sum(tmp$VRegionNucleotides) | |
| 300 matrx[1,z] = round(matrx[1,x] / matrx[1,y] * 100, digits=1) | |
| 301 matrx[2,x] = sum(tmp$transitionMutations) | |
| 302 matrx[2,y] = sum(tmp$VRegionMutations) | |
| 303 matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1) | |
| 304 matrx[3,x] = sum(tmp$transversionMutations) | |
| 305 matrx[3,y] = sum(tmp$VRegionMutations) | |
| 306 matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1) | |
| 307 matrx[4,x] = sum(tmp$transitionMutationsAtGC) | |
| 308 matrx[4,y] = sum(tmp$totalMutationsAtGC) | |
| 309 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1) | |
| 310 matrx[5,x] = sum(tmp$totalMutationsAtGC) | |
| 311 matrx[5,y] = sum(tmp$VRegionMutations) | |
| 312 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1) | |
| 261 | 313 |
| 262 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) | 314 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4) |
| 263 row.names(transitionTable) = c("A", "C", "G", "T") | 315 row.names(transitionTable) = c("A", "C", "G", "T") |
| 264 transitionTable["A","A"] = NA | 316 transitionTable["A","A"] = NA |
| 265 transitionTable["C","C"] = NA | 317 transitionTable["C","C"] = NA |
| 267 transitionTable["T","T"] = NA | 319 transitionTable["T","T"] = NA |
| 268 nts = c("a", "c", "g", "t") | 320 nts = c("a", "c", "g", "t") |
| 269 | 321 |
| 270 | 322 |
| 271 for(nt1 in nts){ | 323 for(nt1 in nts){ |
| 272 for(nt2 in nts){ | 324 for(nt2 in nts){ |
| 273 if(nt1 == nt2){ | 325 if(nt1 == nt2){ |
| 274 next | 326 next |
| 275 } | 327 } |
| 276 NT1 = LETTERS[letters == nt1] | 328 NT1 = LETTERS[letters == nt1] |
| 277 NT2 = LETTERS[letters == nt2] | 329 NT2 = LETTERS[letters == nt2] |
| 278 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") | 330 FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="") |
| 279 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") | 331 CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="") |
| 280 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") | 332 FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="") |
| 281 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") | 333 CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="") |
| 282 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") | 334 FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="") |
| 283 transitionTable[NT1,NT2] = sum( dat[,FR1] + | 335 transitionTable[NT1,NT2] = sum( tmp[,FR1] + |
| 284 dat[,CDR1] + | 336 tmp[,CDR1] + |
| 285 dat[,FR2] + | 337 tmp[,FR2] + |
| 286 dat[,CDR2] + | 338 tmp[,CDR2] + |
| 287 dat[,FR3]) | 339 tmp[,FR3]) |
| 288 } | 340 } |
| 289 } | 341 } |
| 290 | 342 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) |
| 291 setwd(outputdir) | 343 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file="matched_all.txt", sep="\t",quote=F,row.names=F,col.names=T) |
| 344 cat(matrx[1,x], file="total_value.txt") | |
| 345 cat(length(tmp$Sequence.ID), file="total_n.txt") | |
| 346 | |
| 347 | |
| 348 | |
| 349 result = data.frame(matrx) | |
| 350 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)") | |
| 351 | |
| 292 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) | 352 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F) |
| 293 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA) | 353 |
| 294 cat(length(dat$Sequence.ID), file="n.txt") | 354 |
| 295 | 355 if (!("ggplot2" %in% rownames(installed.packages()))) { |
| 296 | 356 install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") |
| 297 weblogo = dat[,c("Sequence.ID", "VGene")] | 357 } |
| 298 weblogo$VGene = gsub("\\*.*", "", weblogo$VGene) | 358 library(ggplot2) |
| 299 | 359 |
| 300 rs12 = read.table("HS12RSS.txt", sep="\t", header=TRUE) | 360 genesForPlot = gsub("[0-9]", "", dat$best_match) |
| 301 rs23 = read.table("HS23RSS.txt", sep="\t", header=TRUE) | 361 genesForPlot = data.frame(table(genesForPlot)) |
| 302 | 362 colnames(genesForPlot) = c("Gene","Freq") |
| 303 result12 = merge(weblogo, rs12, by.x="VGene", by.y="Gene") | 363 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) |
| 304 result23 = merge(weblogo, rs23, by.x="VGene", by.y="Gene") | 364 |
| 305 | 365 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) |
| 306 write.table(x=result12$Sequence, file="weblogo_in_rs12.txt", sep=",",quote=F,row.names=F,col.names=F) | 366 pc = pc + geom_bar(width = 1, stat = "identity") |
| 307 write.table(x=result23$Sequence, file="weblogo_in_rs23.txt", sep=",",quote=F,row.names=F,col.names=F) | 367 pc = pc + coord_polar(theta="y") |
| 308 | 368 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) |
| 369 | |
| 370 png(filename="all.png") | |
| 371 pc | |
| 372 dev.off() | |
| 373 | |
| 374 | |
| 375 #blegh | |
| 376 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match | |
| 377 if(length(genesForPlot) > 0){ | |
| 378 genesForPlot = data.frame(table(genesForPlot)) | |
| 379 colnames(genesForPlot) = c("Gene","Freq") | |
| 380 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | |
| 381 | |
| 382 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | |
| 383 pc = pc + geom_bar(width = 1, stat = "identity") | |
| 384 pc = pc + coord_polar(theta="y") | |
| 385 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA", "( n =", sum(genesForPlot$Freq), ")")) | |
| 386 | |
| 387 | |
| 388 png(filename="ca.png") | |
| 389 print(pc) | |
| 390 dev.off() | |
| 391 } | |
| 392 | |
| 393 genesForPlot = dat[grepl("cg", dat$best_match),]$best_match | |
| 394 if(length(genesForPlot) > 0){ | |
| 395 genesForPlot = data.frame(table(genesForPlot)) | |
| 396 colnames(genesForPlot) = c("Gene","Freq") | |
| 397 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq) | |
| 398 | |
| 399 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label)) | |
| 400 pc = pc + geom_bar(width = 1, stat = "identity") | |
| 401 pc = pc + coord_polar(theta="y") | |
| 402 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG", "( n =", sum(genesForPlot$Freq), ")")) | |
| 403 | |
| 404 | |
| 405 png(filename="cg.png") | |
| 406 print(pc) | |
| 407 dev.off() | |
| 408 } |
