| 
0
 | 
     1 args <- commandArgs(trailingOnly = TRUE)
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 input = args[1]
 | 
| 
4
 | 
     4 genes = unlist(strsplit(args[2], ","))
 | 
| 
0
 | 
     5 outputdir = args[3]
 | 
| 
22
 | 
     6 print(args[4])
 | 
| 
 | 
     7 include_fr1 = ifelse(args[4] == "yes", T, F)
 | 
| 
0
 | 
     8 setwd(outputdir)
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 dat = read.table(input, header=T, sep="\t", fill=T, stringsAsFactors=F)
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 if(length(dat$Sequence.ID) == 0){
 | 
| 
4
 | 
    13   setwd(outputdir)
 | 
| 
 | 
    14   result = data.frame(x = rep(0, 5), y = rep(0, 5), z = rep(NA, 5))
 | 
| 
 | 
    15   row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C G (%)")
 | 
| 
 | 
    16   write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
 | 
| 
 | 
    17   transitionTable = data.frame(A=rep(0, 4),C=rep(0, 4),G=rep(0, 4),T=rep(0, 4))
 | 
| 
 | 
    18   row.names(transitionTable) = c("A", "C", "G", "T")
 | 
| 
 | 
    19   transitionTable["A","A"] = NA
 | 
| 
 | 
    20   transitionTable["C","C"] = NA
 | 
| 
 | 
    21   transitionTable["G","G"] = NA
 | 
| 
 | 
    22   transitionTable["T","T"] = NA
 | 
| 
 | 
    23   write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA)
 | 
| 
 | 
    24   cat("0", file="n.txt")
 | 
| 
 | 
    25   stop("No data")
 | 
| 
0
 | 
    26 }
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 
 | 
| 
 | 
    30 cleanup_columns = c("FR1.IMGT.c.a",
 | 
| 
 | 
    31                     "FR2.IMGT.g.t",
 | 
| 
 | 
    32                     "CDR1.IMGT.Nb.of.nucleotides",
 | 
| 
 | 
    33                     "CDR2.IMGT.t.a",
 | 
| 
 | 
    34                     "FR1.IMGT.c.g",
 | 
| 
 | 
    35                     "CDR1.IMGT.c.t",
 | 
| 
 | 
    36                     "FR2.IMGT.a.c",
 | 
| 
 | 
    37                     "FR2.IMGT.Nb.of.mutations",
 | 
| 
 | 
    38                     "FR2.IMGT.g.c",
 | 
| 
 | 
    39                     "FR2.IMGT.a.g",
 | 
| 
 | 
    40                     "FR3.IMGT.t.a",
 | 
| 
 | 
    41                     "FR3.IMGT.t.c",
 | 
| 
 | 
    42                     "FR2.IMGT.g.a",
 | 
| 
 | 
    43                     "FR3.IMGT.c.g",
 | 
| 
 | 
    44                     "FR1.IMGT.Nb.of.mutations",
 | 
| 
 | 
    45                     "CDR1.IMGT.g.a",
 | 
| 
 | 
    46                     "CDR1.IMGT.t.g",
 | 
| 
 | 
    47                     "CDR1.IMGT.g.c",
 | 
| 
 | 
    48                     "CDR2.IMGT.Nb.of.nucleotides",
 | 
| 
 | 
    49                     "FR2.IMGT.a.t",
 | 
| 
 | 
    50                     "CDR1.IMGT.Nb.of.mutations",
 | 
| 
 | 
    51                     "CDR1.IMGT.a.g",
 | 
| 
 | 
    52                     "FR3.IMGT.a.c",
 | 
| 
 | 
    53                     "FR1.IMGT.g.a",
 | 
| 
 | 
    54                     "FR3.IMGT.a.g",
 | 
| 
 | 
    55                     "FR1.IMGT.a.t",
 | 
| 
 | 
    56                     "CDR2.IMGT.a.g",
 | 
| 
 | 
    57                     "CDR2.IMGT.Nb.of.mutations",
 | 
| 
 | 
    58                     "CDR2.IMGT.g.t",
 | 
| 
 | 
    59                     "CDR2.IMGT.a.c",
 | 
| 
 | 
    60                     "CDR1.IMGT.t.c",
 | 
| 
 | 
    61                     "FR3.IMGT.g.c",
 | 
| 
 | 
    62                     "FR1.IMGT.g.t",
 | 
| 
 | 
    63                     "FR3.IMGT.g.t",
 | 
| 
 | 
    64                     "CDR1.IMGT.a.t",
 | 
| 
 | 
    65                     "FR1.IMGT.a.g",
 | 
| 
 | 
    66                     "FR3.IMGT.a.t",
 | 
| 
 | 
    67                     "FR3.IMGT.Nb.of.nucleotides",
 | 
| 
 | 
    68                     "FR2.IMGT.t.c",
 | 
| 
 | 
    69                     "CDR2.IMGT.g.a",
 | 
| 
 | 
    70                     "FR2.IMGT.t.a",
 | 
| 
 | 
    71                     "CDR1.IMGT.t.a",
 | 
| 
 | 
    72                     "FR2.IMGT.t.g",
 | 
| 
 | 
    73                     "FR3.IMGT.t.g",
 | 
| 
 | 
    74                     "FR2.IMGT.Nb.of.nucleotides",
 | 
| 
 | 
    75                     "FR1.IMGT.t.a",
 | 
| 
 | 
    76                     "FR1.IMGT.t.g",
 | 
| 
 | 
    77                     "FR3.IMGT.c.t",
 | 
| 
 | 
    78                     "FR1.IMGT.t.c",
 | 
| 
 | 
    79                     "CDR2.IMGT.a.t",
 | 
| 
 | 
    80                     "FR2.IMGT.c.t",
 | 
| 
 | 
    81                     "CDR1.IMGT.g.t",
 | 
| 
 | 
    82                     "CDR2.IMGT.t.g",
 | 
| 
 | 
    83                     "FR1.IMGT.Nb.of.nucleotides",
 | 
| 
 | 
    84                     "CDR1.IMGT.c.g",
 | 
| 
 | 
    85                     "CDR2.IMGT.t.c",
 | 
| 
 | 
    86                     "FR3.IMGT.g.a",
 | 
| 
 | 
    87                     "CDR1.IMGT.a.c",
 | 
| 
 | 
    88                     "FR2.IMGT.c.a",
 | 
| 
 | 
    89                     "FR3.IMGT.Nb.of.mutations",
 | 
| 
 | 
    90                     "FR2.IMGT.c.g",
 | 
| 
 | 
    91                     "CDR2.IMGT.g.c",
 | 
| 
 | 
    92                     "FR1.IMGT.g.c",
 | 
| 
 | 
    93                     "CDR2.IMGT.c.t",
 | 
| 
 | 
    94                     "FR3.IMGT.c.a",
 | 
| 
 | 
    95                     "CDR1.IMGT.c.a",
 | 
| 
 | 
    96                     "CDR2.IMGT.c.g",
 | 
| 
 | 
    97                     "CDR2.IMGT.c.a",
 | 
| 
42
 | 
    98                     "FR1.IMGT.c.t",
 | 
| 
 | 
    99                     "FR1.IMGT.Nb.of.silent.mutations",
 | 
| 
 | 
   100                     "FR2.IMGT.Nb.of.silent.mutations",
 | 
| 
 | 
   101                     "FR3.IMGT.Nb.of.silent.mutations",
 | 
| 
 | 
   102                     "FR1.IMGT.Nb.of.nonsilent.mutations",
 | 
| 
 | 
   103                     "FR2.IMGT.Nb.of.nonsilent.mutations",
 | 
| 
 | 
   104                     "FR3.IMGT.Nb.of.nonsilent.mutations")
 | 
| 
0
 | 
   105 
 | 
| 
 | 
   106 for(col in cleanup_columns){
 | 
| 
 | 
   107   dat[,col] = gsub("\\(.*\\)", "", dat[,col])
 | 
| 
 | 
   108   #dat[dat[,col] == "",] = "0"
 | 
| 
 | 
   109   dat[,col] = as.numeric(dat[,col])
 | 
| 
 | 
   110   dat[is.na(dat[,col]),] = 0
 | 
| 
 | 
   111 }
 | 
| 
 | 
   112 
 | 
| 
22
 | 
   113 regions = c("FR1", "CDR1", "FR2", "CDR2", "FR3")
 | 
| 
 | 
   114 if(!include_fr1){
 | 
| 
 | 
   115 	regions = c("CDR1", "FR2", "CDR2", "FR3")
 | 
| 
 | 
   116 }
 | 
| 
0
 | 
   117 
 | 
| 
22
 | 
   118 sum_by_row = function(x, columns) { sum(as.numeric(x[columns]), na.rm=T) }
 | 
| 
0
 | 
   119 
 | 
| 
22
 | 
   120 VRegionMutations_columns = paste(regions, ".IMGT.Nb.of.mutations", sep="")
 | 
| 
 | 
   121 dat$VRegionMutations =  apply(dat, FUN=sum_by_row, 1, columns=VRegionMutations_columns)
 | 
| 
 | 
   122 
 | 
| 
 | 
   123 VRegionNucleotides_columns = paste(regions, ".IMGT.Nb.of.nucleotides", sep="")
 | 
| 
 | 
   124 dat$VRegionNucleotides =  apply(dat, FUN=sum_by_row, 1, columns=VRegionNucleotides_columns)
 | 
| 
 | 
   125 
 | 
| 
 | 
   126 transitionMutations_columns = paste(rep(regions, each=4), c(".IMGT.a.g", ".IMGT.g.a", ".IMGT.c.t", ".IMGT.t.c"), sep="")
 | 
| 
 | 
   127 dat$transitionMutations = apply(dat, FUN=sum_by_row, 1, columns=transitionMutations_columns)
 | 
| 
 | 
   128 
 | 
| 
 | 
   129 transversionMutations_columns = paste(rep(regions, each=8), c(".IMGT.a.c",".IMGT.c.a",".IMGT.a.t",".IMGT.t.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t",".IMGT.t.g"), sep="")
 | 
| 
 | 
   130 dat$transversionMutations = apply(dat, FUN=sum_by_row, 1, columns=transversionMutations_columns)
 | 
| 
0
 | 
   131 
 | 
| 
 | 
   132 
 | 
| 
22
 | 
   133 transitionMutationsAtGC_columns = paste(rep(regions, each=2), c(".IMGT.g.a",".IMGT.c.t"), sep="")
 | 
| 
 | 
   134 dat$transitionMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=transitionMutationsAtGC_columns)
 | 
| 
0
 | 
   135 
 | 
| 
22
 | 
   136 totalMutationsAtGC_columns = paste(rep(regions, each=6), c(".IMGT.g.a",".IMGT.c.t",".IMGT.c.a",".IMGT.g.c",".IMGT.c.g",".IMGT.g.t"), sep="")
 | 
| 
 | 
   137 dat$totalMutationsAtGC = apply(dat, FUN=sum_by_row, 1, columns=totalMutationsAtGC_columns)
 | 
| 
0
 | 
   138 
 | 
| 
24
 | 
   139 FRRegions = regions[grepl("FR", regions)]
 | 
| 
 | 
   140 CDRRegions = regions[grepl("CDR", regions)]
 | 
| 
 | 
   141 
 | 
| 
 | 
   142 FR_silentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.silent.mutations", sep="")
 | 
| 
 | 
   143 dat$silentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_silentMutations_columns)
 | 
| 
23
 | 
   144 
 | 
| 
24
 | 
   145 CDR_silentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.silent.mutations", sep="")
 | 
| 
 | 
   146 dat$silentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_silentMutations_columns)
 | 
| 
 | 
   147 
 | 
| 
 | 
   148 FR_nonSilentMutations_columns = paste(FRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
 | 
| 
 | 
   149 dat$nonSilentMutationsFR = apply(dat, FUN=sum_by_row, 1, columns=FR_nonSilentMutations_columns)
 | 
| 
 | 
   150 
 | 
| 
 | 
   151 CDR_nonSilentMutations_columns = paste(CDRRegions, ".IMGT.Nb.of.nonsilent.mutations", sep="")
 | 
| 
 | 
   152 dat$nonSilentMutationsCDR = apply(dat, FUN=sum_by_row, 1, columns=CDR_nonSilentMutations_columns)
 | 
| 
0
 | 
   153 
 | 
| 
40
 | 
   154 mutation.sum.columns = c("Sequence.ID", "VRegionMutations", "VRegionNucleotides", "transitionMutations", "transversionMutations", "transitionMutationsAtGC", "silentMutationsFR", "nonSilentMutationsFR", "silentMutationsCDR", "nonSilentMutationsCDR")
 | 
| 
 | 
   155 
 | 
| 
 | 
   156 write.table(dat[,mutation.sum.columns], "mutation_by_id.txt", sep="\t",quote=F,row.names=F,col.names=T)
 | 
| 
0
 | 
   157 
 | 
| 
4
 | 
   158 setwd(outputdir)
 | 
| 
 | 
   159 
 | 
| 
26
 | 
   160 nts = c("a", "c", "g", "t")
 | 
| 
22
 | 
   161 zeros=rep(0, 4)
 | 
| 
24
 | 
   162 matrx = matrix(data = 0, ncol=((length(genes) + 1) * 3),nrow=7)
 | 
| 
4
 | 
   163 for(i in 1:length(genes)){
 | 
| 
 | 
   164   gene = genes[i]
 | 
| 
 | 
   165   tmp = dat[grepl(paste(".*", gene, ".*", sep=""), dat$best_match),]
 | 
| 
 | 
   166   if(gene == "."){
 | 
| 
 | 
   167     tmp = dat
 | 
| 
 | 
   168   }
 | 
| 
 | 
   169   j = i - 1
 | 
| 
 | 
   170   x = (j * 3) + 1
 | 
| 
 | 
   171   y = (j * 3) + 2
 | 
| 
 | 
   172   z = (j * 3) + 3
 | 
| 
 | 
   173   matrx[1,x] = sum(tmp$VRegionMutations)
 | 
| 
 | 
   174   matrx[1,y] = sum(tmp$VRegionNucleotides)
 | 
| 
 | 
   175   matrx[1,z] = round(matrx[1,x] / matrx[1,y] * 100, digits=1)
 | 
| 
 | 
   176   matrx[2,x] = sum(tmp$transitionMutations)
 | 
| 
 | 
   177   matrx[2,y] = sum(tmp$VRegionMutations)
 | 
| 
 | 
   178   matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1)
 | 
| 
 | 
   179   matrx[3,x] = sum(tmp$transversionMutations)
 | 
| 
 | 
   180   matrx[3,y] = sum(tmp$VRegionMutations)
 | 
| 
 | 
   181   matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1)
 | 
| 
 | 
   182   matrx[4,x] = sum(tmp$transitionMutationsAtGC)
 | 
| 
 | 
   183   matrx[4,y] = sum(tmp$totalMutationsAtGC)
 | 
| 
 | 
   184   matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
 | 
| 
 | 
   185   matrx[5,x] = sum(tmp$totalMutationsAtGC)
 | 
| 
 | 
   186   matrx[5,y] = sum(tmp$VRegionMutations)
 | 
| 
 | 
   187   matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
 | 
| 
25
 | 
   188   matrx[6,x] = sum(tmp$nonSilentMutationsFR)
 | 
| 
 | 
   189   matrx[6,y] = sum(tmp$silentMutationsFR)
 | 
| 
 | 
   190   matrx[6,z] = round(matrx[6,x] / matrx[6,y], digits=1)
 | 
| 
 | 
   191   matrx[7,x] = sum(tmp$nonSilentMutationsCDR)
 | 
| 
 | 
   192   matrx[7,y] = sum(tmp$silentMutationsCDR)
 | 
| 
 | 
   193   matrx[7,z] = round(matrx[7,x] / matrx[7,y], digits=1)
 | 
| 
23
 | 
   194   
 | 
| 
4
 | 
   195   
 | 
| 
22
 | 
   196   transitionTable = data.frame(A=zeros,C=zeros,G=zeros,T=zeros)
 | 
| 
4
 | 
   197   row.names(transitionTable) = c("A", "C", "G", "T")
 | 
| 
 | 
   198   transitionTable["A","A"] = NA
 | 
| 
 | 
   199   transitionTable["C","C"] = NA
 | 
| 
 | 
   200   transitionTable["G","G"] = NA
 | 
| 
 | 
   201   transitionTable["T","T"] = NA
 | 
| 
22
 | 
   202 
 | 
| 
 | 
   203   if(nrow(tmp) > 0){
 | 
| 
 | 
   204 		for(nt1 in nts){
 | 
| 
 | 
   205 			for(nt2 in nts){
 | 
| 
 | 
   206 				if(nt1 == nt2){
 | 
| 
 | 
   207 					next
 | 
| 
 | 
   208 				}
 | 
| 
 | 
   209 				NT1 = LETTERS[letters == nt1]
 | 
| 
 | 
   210 				NT2 = LETTERS[letters == nt2]
 | 
| 
 | 
   211 				FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   212 				CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   213 				FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   214 				CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   215 				FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   216 				if(include_fr1){
 | 
| 
 | 
   217 					transitionTable[NT1,NT2] = sum(tmp[,c(FR1, CDR1, FR2, CDR2, FR3)])
 | 
| 
 | 
   218 				} else {
 | 
| 
 | 
   219 					transitionTable[NT1,NT2] = sum(tmp[,c(CDR1, FR2, CDR2, FR3)])
 | 
| 
 | 
   220 				}
 | 
| 
 | 
   221 			}
 | 
| 
 | 
   222 		}
 | 
| 
 | 
   223   }
 | 
| 
4
 | 
   224   
 | 
| 
 | 
   225   
 | 
| 
 | 
   226   write.table(x=transitionTable, file=paste("transitions_", gene ,".txt", sep=""), sep=",",quote=F,row.names=T,col.names=NA)
 | 
| 
 | 
   227   write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file=paste("matched_", gene ,".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T)
 | 
| 
 | 
   228   
 | 
| 
 | 
   229   cat(matrx[1,x], file=paste(gene, "_value.txt" ,sep=""))
 | 
| 
 | 
   230   cat(length(tmp$Sequence.ID), file=paste(gene, "_n.txt" ,sep=""))
 | 
| 
 | 
   231 }
 | 
| 
 | 
   232 
 | 
| 
 | 
   233 #again for all of the data
 | 
| 
 | 
   234 tmp = dat
 | 
| 
 | 
   235 j = i
 | 
| 
 | 
   236 x = (j * 3) + 1
 | 
| 
 | 
   237 y = (j * 3) + 2
 | 
| 
 | 
   238 z = (j * 3) + 3
 | 
| 
 | 
   239 matrx[1,x] = sum(tmp$VRegionMutations)
 | 
| 
 | 
   240 matrx[1,y] = sum(tmp$VRegionNucleotides)
 | 
| 
 | 
   241 matrx[1,z] = round(matrx[1,x] / matrx[1,y] * 100, digits=1)
 | 
| 
 | 
   242 matrx[2,x] = sum(tmp$transitionMutations)
 | 
| 
 | 
   243 matrx[2,y] = sum(tmp$VRegionMutations)
 | 
| 
 | 
   244 matrx[2,z] = round(matrx[2,x] / matrx[2,y] * 100, digits=1)
 | 
| 
 | 
   245 matrx[3,x] = sum(tmp$transversionMutations)
 | 
| 
 | 
   246 matrx[3,y] = sum(tmp$VRegionMutations)
 | 
| 
 | 
   247 matrx[3,z] = round(matrx[3,x] / matrx[3,y] * 100, digits=1)
 | 
| 
 | 
   248 matrx[4,x] = sum(tmp$transitionMutationsAtGC)
 | 
| 
 | 
   249 matrx[4,y] = sum(tmp$totalMutationsAtGC)
 | 
| 
 | 
   250 matrx[4,z] = round(matrx[4,x] / matrx[4,y] * 100, digits=1)
 | 
| 
 | 
   251 matrx[5,x] = sum(tmp$totalMutationsAtGC)
 | 
| 
 | 
   252 matrx[5,y] = sum(tmp$VRegionMutations)
 | 
| 
 | 
   253 matrx[5,z] = round(matrx[5,x] / matrx[5,y] * 100, digits=1)
 | 
| 
25
 | 
   254 matrx[6,x] = sum(tmp$nonSilentMutationsFR)
 | 
| 
 | 
   255 matrx[6,y] = sum(tmp$silentMutationsFR)
 | 
| 
 | 
   256 matrx[6,z] = round(matrx[6,x] / matrx[6,y], digits=1)
 | 
| 
 | 
   257 matrx[7,x] = sum(tmp$nonSilentMutationsCDR)
 | 
| 
 | 
   258 matrx[7,y] = sum(tmp$silentMutationsCDR)
 | 
| 
 | 
   259 matrx[7,z] = round(matrx[7,x] / matrx[7,y], digits=1)
 | 
| 
0
 | 
   260 
 | 
| 
 | 
   261 transitionTable = data.frame(A=1:4,C=1:4,G=1:4,T=1:4)
 | 
| 
 | 
   262 row.names(transitionTable) = c("A", "C", "G", "T")
 | 
| 
 | 
   263 transitionTable["A","A"] = NA
 | 
| 
 | 
   264 transitionTable["C","C"] = NA
 | 
| 
 | 
   265 transitionTable["G","G"] = NA
 | 
| 
 | 
   266 transitionTable["T","T"] = NA
 | 
| 
 | 
   267 
 | 
| 
 | 
   268 
 | 
| 
 | 
   269 for(nt1 in nts){
 | 
| 
4
 | 
   270 	for(nt2 in nts){
 | 
| 
 | 
   271 		if(nt1 == nt2){
 | 
| 
 | 
   272 			next
 | 
| 
 | 
   273 		}
 | 
| 
 | 
   274 		NT1 = LETTERS[letters == nt1]
 | 
| 
 | 
   275 		NT2 = LETTERS[letters == nt2]
 | 
| 
 | 
   276 		FR1 = paste("FR1.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   277 		CDR1 = paste("CDR1.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   278 		FR2 = paste("FR2.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   279 		CDR2 = paste("CDR2.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
 | 
   280 		FR3 = paste("FR3.IMGT.", nt1, ".", nt2, sep="")
 | 
| 
22
 | 
   281 		if(include_fr1){
 | 
| 
 | 
   282 			transitionTable[NT1,NT2] = sum(tmp[,c(FR1, CDR1, FR2, CDR2, FR3)])
 | 
| 
 | 
   283 		} else {
 | 
| 
 | 
   284 			transitionTable[NT1,NT2] = sum(tmp[,c(CDR1, FR2, CDR2, FR3)])
 | 
| 
 | 
   285 		}
 | 
| 
4
 | 
   286 	}
 | 
| 
 | 
   287 }
 | 
| 
 | 
   288 write.table(x=transitionTable, file="transitions.txt", sep=",",quote=F,row.names=T,col.names=NA)
 | 
| 
 | 
   289 write.table(x=tmp[,c("Sequence.ID", "best_match", "chunk_hit_percentage", "nt_hit_percentage", "start_locations")], file="matched_all.txt", sep="\t",quote=F,row.names=F,col.names=T)
 | 
| 
 | 
   290 cat(matrx[1,x], file="total_value.txt")
 | 
| 
 | 
   291 cat(length(tmp$Sequence.ID), file="total_n.txt")
 | 
| 
 | 
   292 
 | 
| 
 | 
   293 
 | 
| 
 | 
   294 
 | 
| 
 | 
   295 result = data.frame(matrx)
 | 
| 
33
 | 
   296 row.names(result) = c("Number of Mutations (%)", "Transition (%)", "Transversions (%)", "Transitions at G C (%)", "Targeting of C.G (%)", "FR R/S (ratio)", "CDR R/S (ratio)")
 | 
| 
4
 | 
   297 
 | 
| 
 | 
   298 write.table(x=result, file="mutations.txt", sep=",",quote=F,row.names=T,col.names=F)
 | 
| 
 | 
   299 
 | 
| 
 | 
   300 
 | 
| 
 | 
   301 if (!("ggplot2" %in% rownames(installed.packages()))) {
 | 
| 
 | 
   302 	install.packages("ggplot2", repos="http://cran.xl-mirror.nl/") 
 | 
| 
 | 
   303 }
 | 
| 
 | 
   304 library(ggplot2)
 | 
| 
 | 
   305 
 | 
| 
 | 
   306 genesForPlot = gsub("[0-9]", "", dat$best_match)
 | 
| 
 | 
   307 genesForPlot = data.frame(table(genesForPlot))
 | 
| 
 | 
   308 colnames(genesForPlot) = c("Gene","Freq")
 | 
| 
 | 
   309 genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
 | 
| 
26
 | 
   310 write.table(genesForPlot, "all.txt", sep="\t",quote=F,row.names=F,col.names=T)
 | 
| 
 | 
   311 
 | 
| 
4
 | 
   312 
 | 
| 
 | 
   313 pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
 | 
| 
 | 
   314 pc = pc + geom_bar(width = 1, stat = "identity")
 | 
| 
 | 
   315 pc = pc + coord_polar(theta="y")
 | 
| 
26
 | 
   316 pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("Classes", "( n =", sum(genesForPlot$Freq), ")"))
 | 
| 
4
 | 
   317 
 | 
| 
 | 
   318 png(filename="all.png")
 | 
| 
 | 
   319 pc
 | 
| 
 | 
   320 dev.off()
 | 
| 
 | 
   321 
 | 
| 
 | 
   322 
 | 
| 
 | 
   323 #blegh
 | 
| 
 | 
   324 genesForPlot = dat[grepl("ca", dat$best_match),]$best_match
 | 
| 
 | 
   325 if(length(genesForPlot) > 0){
 | 
| 
 | 
   326 	genesForPlot = data.frame(table(genesForPlot))
 | 
| 
 | 
   327 	colnames(genesForPlot) = c("Gene","Freq")
 | 
| 
 | 
   328 	genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
 | 
| 
 | 
   329 
 | 
| 
 | 
   330 	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
 | 
| 
 | 
   331 	pc = pc + geom_bar(width = 1, stat = "identity")
 | 
| 
 | 
   332 	pc = pc + coord_polar(theta="y")
 | 
| 
26
 | 
   333 	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgA subclasses", "( n =", sum(genesForPlot$Freq), ")"))
 | 
| 
 | 
   334 	write.table(genesForPlot, "ca.txt", sep="\t",quote=F,row.names=F,col.names=T)
 | 
| 
4
 | 
   335 
 | 
| 
 | 
   336 	png(filename="ca.png")
 | 
| 
 | 
   337 	print(pc)
 | 
| 
 | 
   338 	dev.off()
 | 
| 
0
 | 
   339 }
 | 
| 
 | 
   340 
 | 
| 
4
 | 
   341 genesForPlot = dat[grepl("cg", dat$best_match),]$best_match
 | 
| 
 | 
   342 if(length(genesForPlot) > 0){
 | 
| 
 | 
   343 	genesForPlot = data.frame(table(genesForPlot))
 | 
| 
 | 
   344 	colnames(genesForPlot) = c("Gene","Freq")
 | 
| 
 | 
   345 	genesForPlot$label = paste(genesForPlot$Gene, "-", genesForPlot$Freq)
 | 
| 
 | 
   346 
 | 
| 
 | 
   347 	pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=label))
 | 
| 
 | 
   348 	pc = pc + geom_bar(width = 1, stat = "identity")
 | 
| 
 | 
   349 	pc = pc + coord_polar(theta="y")
 | 
| 
26
 | 
   350 	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IgG subclasses", "( n =", sum(genesForPlot$Freq), ")"))
 | 
| 
 | 
   351 	write.table(genesForPlot, "cg.txt", sep="\t",quote=F,row.names=F,col.names=T)
 | 
| 
0
 | 
   352 
 | 
| 
4
 | 
   353 	png(filename="cg.png")
 | 
| 
 | 
   354 	print(pc)
 | 
| 
 | 
   355 	dev.off()
 | 
| 
 | 
   356 }
 | 
| 
22
 | 
   357 
 | 
| 
 | 
   358 dat$percentage_mutations = round(dat$VRegionMutations / dat$VRegionNucleotides * 100, 2)
 | 
| 
 | 
   359 
 | 
| 
26
 | 
   360 p = ggplot(dat, aes(best_match, percentage_mutations))
 | 
| 
23
 | 
   361 p = p + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) + geom_point(aes(colour=best_match), position="jitter")
 | 
| 
22
 | 
   362 p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot")
 | 
| 
26
 | 
   363 write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T)
 | 
| 
 | 
   364 
 | 
| 
22
 | 
   365 
 | 
| 
 | 
   366 png(filename="scatter.png")
 | 
| 
 | 
   367 print(p)
 | 
| 
 | 
   368 dev.off()
 | 
| 
 | 
   369 
 | 
| 
 | 
   370 
 | 
| 
 | 
   371 
 | 
| 
 | 
   372 
 | 
| 
 | 
   373 
 | 
| 
 | 
   374 
 | 
| 
 | 
   375 
 | 
| 
 | 
   376 
 | 
| 
 | 
   377 
 | 
| 
 | 
   378 
 | 
| 
 | 
   379 
 | 
| 
 | 
   380 
 | 
| 
 | 
   381 
 | 
| 
 | 
   382 
 | 
| 
 | 
   383 
 | 
| 
 | 
   384 
 | 
| 
 | 
   385 
 | 
| 
 | 
   386 
 | 
| 
 | 
   387 
 | 
| 
 | 
   388 
 | 
| 
 | 
   389 
 | 
| 
 | 
   390 
 | 
| 
 | 
   391 
 | 
| 
 | 
   392 
 | 
| 
 | 
   393 
 | 
| 
 | 
   394 
 | 
| 
 | 
   395 
 | 
| 
 | 
   396 
 | 
| 
 | 
   397 
 | 
| 
 | 
   398 
 | 
| 
 | 
   399 
 | 
| 
 | 
   400 
 |