Mercurial > repos > davidvanzessen > mutation_analysis
comparison sequence_overview.r @ 82:564c4f6da203 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 24 May 2016 03:52:14 -0400 |
| parents | a778156dad3d |
| children | 0011a0597685 |
comparison
equal
deleted
inserted
replaced
| 81:a778156dad3d | 82:564c4f6da203 |
|---|---|
| 2 | 2 |
| 3 args <- commandArgs(trailingOnly = TRUE) | 3 args <- commandArgs(trailingOnly = TRUE) |
| 4 | 4 |
| 5 gene.matches = args[1] | 5 gene.matches = args[1] |
| 6 sequence.file = args[2] | 6 sequence.file = args[2] |
| 7 outputdir = args[3] | 7 merged.file = args[3] |
| 8 outputdir = args[4] | |
| 9 gene.classes = unlist(strsplit(args[5], ",")) | |
| 10 hotspot.analysis.sum.file = args[6] | |
| 8 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") | 11 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") |
| 9 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") | 12 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") |
| 10 main.html = "index.html" | 13 main.html = "index.html" |
| 11 | 14 |
| 12 setwd(outputdir) | 15 setwd(outputdir) |
| 13 | 16 |
| 14 genes = read.table(gene.matches, header=T, sep="\t", fill=T) | 17 genes = read.table(gene.matches, header=T, sep="\t", fill=T) |
| 15 sequences = read.table(sequence.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | 18 sequences = read.table(sequence.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") |
| 19 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | |
| 20 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") | |
| 16 | 21 |
| 17 dat = merge(sequences, genes, by="Sequence.ID") | 22 dat = merge(sequences, genes, by="Sequence.ID") |
| 23 | |
| 24 dat = dat[dat$Sequence.ID %in% merged$Sequence.ID,] | |
| 18 | 25 |
| 19 dat$seq_conc = paste(dat$CDR1.IMGT, dat$CDR2.IMGT, dat$CDR3.IMGT, dat$FR2.IMGT, dat$FR3.IMGT) | 26 dat$seq_conc = paste(dat$CDR1.IMGT, dat$CDR2.IMGT, dat$CDR3.IMGT, dat$FR2.IMGT, dat$FR3.IMGT) |
| 20 | 27 |
| 21 IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] | 28 IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] |
| 22 IDs$best_match = as.character(IDs$best_match) | 29 IDs$best_match = as.character(IDs$best_match) |
| 23 | 30 |
| 24 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) | 31 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) |
| 25 | 32 |
| 26 dat = data.frame(table(dat$seq_conc)) | 33 dat = data.frame(table(dat$seq_conc, dat$Functionality)) |
| 27 | 34 |
| 28 dat = dat[dat$Freq > 1,] | 35 dat = dat[dat$Freq > 1,] |
| 29 | 36 |
| 30 names(dat) = c("seq_conc", "Freq") | 37 names(dat) = c("seq_conc", "Functionality", "Freq") |
| 31 | 38 |
| 32 dat$seq_conc = factor(dat$seq_conc) | 39 dat$seq_conc = factor(dat$seq_conc) |
| 33 | 40 |
| 34 dat = dat[order(nchar(as.character(dat$seq_conc))),] | 41 dat = dat[order(as.character(dat$seq_conc)),] |
| 35 | 42 |
| 36 #writing html from R... | 43 #writing html from R... |
| 37 td = function(val) { paste("<td>", val, "</td>", sep="") } | 44 td = function(val) { paste("<td>", val, "</td>", sep="") } |
| 38 tr = function(val) { capture.output(cat("<tr>", td(val), "</tr>", sep="")) } | 45 tr = function(val) { capture.output(cat("<tr>", td(val), "</tr>", sep="")) } |
| 39 make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") } | 46 make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") } |
| 52 cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cg3",] | 59 cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cg3",] |
| 53 cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cg4",] | 60 cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cg4",] |
| 54 | 61 |
| 55 cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cm",] | 62 cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & IDs$best_match == "cm",] |
| 56 | 63 |
| 64 classes = c(nrow(ca1), nrow(ca2), nrow(cg1), nrow(cg2), nrow(cg3), nrow(cg4), nrow(cm)) | |
| 65 | |
| 66 classes.sum = sum(classes) | |
| 67 | |
| 68 if(!any(classes != 0 & classes != classes.sum)){ | |
| 69 next | |
| 70 } | |
| 71 | |
| 57 id = as.numeric(dat[i,"seq_conc"]) | 72 id = as.numeric(dat[i,"seq_conc"]) |
| 58 | 73 |
| 59 functionality = dat[i,"Functionality"] | 74 functionality = dat[i,"Functionality"] |
| 60 | 75 |
| 61 if(nrow(ca1) > 0){ | 76 if(nrow(ca1) > 0){ |
| 111 NToverview = genes[,c("Sequence.ID", "best_match")] | 126 NToverview = genes[,c("Sequence.ID", "best_match")] |
| 112 sequences$seq = paste(sequences$CDR2.IMGT, sequences$CDR2.IMGT, sequences$FR2.IMGT, sequences$FR3.IMGT, sep="_") | 127 sequences$seq = paste(sequences$CDR2.IMGT, sequences$CDR2.IMGT, sequences$FR2.IMGT, sequences$FR3.IMGT, sep="_") |
| 113 | 128 |
| 114 NToverview = merge(NToverview, sequences[,c("Sequence.ID", "seq")], by="Sequence.ID") | 129 NToverview = merge(NToverview, sequences[,c("Sequence.ID", "seq")], by="Sequence.ID") |
| 115 | 130 |
| 131 NToverview = NToverview[NToverview$Sequence.ID %in% merged$Sequence.ID,] | |
| 132 | |
| 116 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) | 133 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) |
| 117 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) | 134 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) |
| 118 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) | 135 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) |
| 119 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) | 136 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) |
| 120 | 137 |
| 121 NTsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) | 138 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) |
| 122 | 139 |
| 123 print(names(NToverview)) | 140 #NToverview = rbind(NToverview, NTsum) |
| 124 print(names(NTsum)) | 141 |
| 125 | 142 NTresult = data.frame(nt=c("A", "C", "T", "G")) |
| 126 NToverview = rbind(NToverview, NTsum) | 143 |
| 144 for(clazz in gene.classes){ | |
| 145 NToverview.sub = NToverview[grepl(clazz, NToverview$best_match),] | |
| 146 new.col.x = c(sum(NToverview.sub$A), sum(NToverview.sub$C), sum(NToverview.sub$T), sum(NToverview.sub$G)) | |
| 147 new.col.y = sum(new.col.x) | |
| 148 new.col.z = round(new.col.x / new.col.y * 100, 2) | |
| 149 | |
| 150 tmp = names(NTresult) | |
| 151 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) | |
| 152 names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep="")) | |
| 153 } | |
| 154 | |
| 155 new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G)) | |
| 156 new.col.y = sum(new.col.x) | |
| 157 new.col.z = round(new.col.x / new.col.y * 100, 2) | |
| 158 | |
| 159 tmp = names(NTresult) | |
| 160 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) | |
| 161 names(NTresult) = c(tmp, paste("all", c("x", "y", "z"), sep="")) | |
| 162 | |
| 163 names(hotspot.analysis.sum) = names(NTresult) | |
| 164 | |
| 165 hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult) | |
| 166 | |
| 167 print(hotspot.analysis.sum) | |
| 168 | |
| 169 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0") | |
| 170 | |
| 127 | 171 |
| 128 write.table(NToverview, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T) | 172 write.table(NToverview, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T) |
| 129 #write.table(NTsum, NTsum.file, quote=F, sep="\t", row.names=F, col.names=T) | 173 |
| 130 | 174 |
| 131 | 175 |
| 132 | 176 |
| 133 | 177 |
| 134 | 178 |
| 135 | 179 |
| 136 | 180 |
| 137 | 181 |
| 138 | 182 |
| 139 | 183 |
| 140 | 184 |
| 141 | 185 |
| 142 | 186 |
| 143 | 187 |
| 144 | 188 |
| 145 | 189 |
| 146 | 190 |
| 147 | 191 |
| 148 | 192 |
| 149 | 193 |
| 150 | 194 |
| 151 | 195 |
| 152 | 196 |
| 153 | 197 |
| 154 | 198 |
| 155 | 199 |
| 156 | 200 |
| 157 | 201 |
| 158 | 202 |
| 159 |
