comparison sequence_overview.r @ 81:a778156dad3d draft

Uploaded
author davidvanzessen
date Tue, 17 May 2016 04:17:38 -0400
parents 0513b46178c4
children 564c4f6da203
comparison
equal deleted inserted replaced
80:a4c2ddeadec0 81:a778156dad3d
3 args <- commandArgs(trailingOnly = TRUE) 3 args <- commandArgs(trailingOnly = TRUE)
4 4
5 gene.matches = args[1] 5 gene.matches = args[1]
6 sequence.file = args[2] 6 sequence.file = args[2]
7 outputdir = args[3] 7 outputdir = args[3]
8 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
9 NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
8 main.html = "index.html" 10 main.html = "index.html"
9 11
10 setwd(outputdir) 12 setwd(outputdir)
11 13
12 genes = read.table(gene.matches, header=T, sep="\t", fill=T) 14 genes = read.table(gene.matches, header=T, sep="\t", fill=T)
19 IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] 21 IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
20 IDs$best_match = as.character(IDs$best_match) 22 IDs$best_match = as.character(IDs$best_match)
21 23
22 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) 24 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
23 25
24 dat = data.frame(table(dat$best_match, dat$seq_conc, dat$Functionality)) 26 dat = data.frame(table(dat$seq_conc))
25 27
26 dat = dat[dat$Freq > 1,] 28 dat = dat[dat$Freq > 1,]
27 29
28 names(dat) = c("best_match", "seq_conc", "Functionality", "Freq") 30 names(dat) = c("seq_conc", "Freq")
29 31
30 dat$seq_conc = factor(dat$seq_conc) 32 dat$seq_conc = factor(dat$seq_conc)
31 33
32 dat = dat[order(nchar(as.character(dat$seq_conc))),] 34 dat = dat[order(nchar(as.character(dat$seq_conc))),]
33 35
98 100
99 cat(tr(rw), file=main.html, append=T) 101 cat(tr(rw), file=main.html, append=T)
100 } 102 }
101 103
102 cat("</table>", file=main.html, append=T) 104 cat("</table>", file=main.html, append=T)
105
106
107 #ACGT overview
108
109
110
111 NToverview = genes[,c("Sequence.ID", "best_match")]
112 sequences$seq = paste(sequences$CDR2.IMGT, sequences$CDR2.IMGT, sequences$FR2.IMGT, sequences$FR3.IMGT, sep="_")
113
114 NToverview = merge(NToverview, sequences[,c("Sequence.ID", "seq")], by="Sequence.ID")
115
116 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
117 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))
118 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq))
119 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq))
120
121 NTsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T))
122
123 print(names(NToverview))
124 print(names(NTsum))
125
126 NToverview = rbind(NToverview, NTsum)
127
128 write.table(NToverview, NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
129 #write.table(NTsum, NTsum.file, quote=F, sep="\t", row.names=F, col.names=T)
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159