Mercurial > repos > davidvanzessen > mutation_analysis
comparison sequence_overview.r @ 81:a778156dad3d draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 17 May 2016 04:17:38 -0400 |
| parents | 0513b46178c4 |
| children | 564c4f6da203 |
comparison
equal
deleted
inserted
replaced
| 80:a4c2ddeadec0 | 81:a778156dad3d |
|---|---|
| 3 args <- commandArgs(trailingOnly = TRUE) | 3 args <- commandArgs(trailingOnly = TRUE) |
| 4 | 4 |
| 5 gene.matches = args[1] | 5 gene.matches = args[1] |
| 6 sequence.file = args[2] | 6 sequence.file = args[2] |
| 7 outputdir = args[3] | 7 outputdir = args[3] |
| 8 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") | |
| 9 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") | |
| 8 main.html = "index.html" | 10 main.html = "index.html" |
| 9 | 11 |
| 10 setwd(outputdir) | 12 setwd(outputdir) |
| 11 | 13 |
| 12 genes = read.table(gene.matches, header=T, sep="\t", fill=T) | 14 genes = read.table(gene.matches, header=T, sep="\t", fill=T) |
| 19 IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] | 21 IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] |
| 20 IDs$best_match = as.character(IDs$best_match) | 22 IDs$best_match = as.character(IDs$best_match) |
| 21 | 23 |
| 22 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) | 24 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) |
| 23 | 25 |
| 24 dat = data.frame(table(dat$best_match, dat$seq_conc, dat$Functionality)) | 26 dat = data.frame(table(dat$seq_conc)) |
| 25 | 27 |
| 26 dat = dat[dat$Freq > 1,] | 28 dat = dat[dat$Freq > 1,] |
| 27 | 29 |
| 28 names(dat) = c("best_match", "seq_conc", "Functionality", "Freq") | 30 names(dat) = c("seq_conc", "Freq") |
| 29 | 31 |
| 30 dat$seq_conc = factor(dat$seq_conc) | 32 dat$seq_conc = factor(dat$seq_conc) |
| 31 | 33 |
| 32 dat = dat[order(nchar(as.character(dat$seq_conc))),] | 34 dat = dat[order(nchar(as.character(dat$seq_conc))),] |
| 33 | 35 |
| 98 | 100 |
| 99 cat(tr(rw), file=main.html, append=T) | 101 cat(tr(rw), file=main.html, append=T) |
| 100 } | 102 } |
| 101 | 103 |
| 102 cat("</table>", file=main.html, append=T) | 104 cat("</table>", file=main.html, append=T) |
| 105 | |
| 106 | |
| 107 #ACGT overview | |
| 108 | |
| 109 | |
| 110 | |
| 111 NToverview = genes[,c("Sequence.ID", "best_match")] | |
| 112 sequences$seq = paste(sequences$CDR2.IMGT, sequences$CDR2.IMGT, sequences$FR2.IMGT, sequences$FR3.IMGT, sep="_") | |
| 113 | |
| 114 NToverview = merge(NToverview, sequences[,c("Sequence.ID", "seq")], by="Sequence.ID") | |
| 115 | |
| 116 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) | |
| 117 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) | |
| 118 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) | |
| 119 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) | |
| 120 | |
| 121 NTsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) | |
| 122 | |
| 123 print(names(NToverview)) | |
| 124 print(names(NTsum)) | |
| 125 | |
| 126 NToverview = rbind(NToverview, NTsum) | |
| 127 | |
| 128 write.table(NToverview, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T) | |
| 129 #write.table(NTsum, NTsum.file, quote=F, sep="\t", row.names=F, col.names=T) | |
| 130 | |
| 131 | |
| 132 | |
| 133 | |
| 134 | |
| 135 | |
| 136 | |
| 137 | |
| 138 | |
| 139 | |
| 140 | |
| 141 | |
| 142 | |
| 143 | |
| 144 | |
| 145 | |
| 146 | |
| 147 | |
| 148 | |
| 149 | |
| 150 | |
| 151 | |
| 152 | |
| 153 | |
| 154 | |
| 155 | |
| 156 | |
| 157 | |
| 158 | |
| 159 |
