comparison sequence_overview.r @ 98:5ffbf40cdd4b draft

Uploaded
author davidvanzessen
date Thu, 16 Jun 2016 05:05:47 -0400
parents e39176ccddc8
children 86206431cbb0
comparison
equal deleted inserted replaced
97:6e8dfbe164c6 98:5ffbf40cdd4b
39 td = function(val) { paste("<td>", val, "</td>", sep="") } 39 td = function(val) { paste("<td>", val, "</td>", sep="") }
40 tr = function(val) { capture.output(cat("<tr>", td(val), "</tr>", sep="")) } 40 tr = function(val) { capture.output(cat("<tr>", td(val), "</tr>", sep="")) }
41 make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") } 41 make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") }
42 tbl = function(df) { res = "<table border='1'>"; for(i in 1:nrow(df)){ res = paste(res, tr(df[i,]), sep=""); }; res = paste(res, "</table>"); } 42 tbl = function(df) { res = "<table border='1'>"; for(i in 1:nrow(df)){ res = paste(res, tr(df[i,]), sep=""); }; res = paste(res, "</table>"); }
43 43
44 print(paste("Number of unique sequences to be written to the sequence overview page", nrow(dat)))
45
44 cat("<table border='1'>", file=main.html, append=F) 46 cat("<table border='1'>", file=main.html, append=F)
45 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) 47 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T)
46 cat("<tr><th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th></tr>", file=main.html, append=T) 48 cat("<tr><th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th></tr>", file=main.html, append=T)
47 49
48 for(i in 1:nrow(dat)){ 50 for(i in 1:nrow(dat)){
51
49 ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca1", IDs$best_match),] 52 ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca1", IDs$best_match),]
50 ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca2", IDs$best_match),] 53 ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca2", IDs$best_match),]
51 54
52 cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg1", IDs$best_match),] 55 cg1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg1", IDs$best_match),]
53 cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg2", IDs$best_match),] 56 cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg2", IDs$best_match),]
60 classes = c(nrow(ca1), nrow(ca2), nrow(cg1), nrow(cg2), nrow(cg3), nrow(cg4), nrow(cm)) 63 classes = c(nrow(ca1), nrow(ca2), nrow(cg1), nrow(cg2), nrow(cg3), nrow(cg4), nrow(cm))
61 64
62 classes.sum = sum(classes) 65 classes.sum = sum(classes)
63 66
64 if(classes.sum == 1){ 67 if(classes.sum == 1){
68 print(paste("next", i, classes.sum))
65 next 69 next
70 } else {
71 print(i)
66 } 72 }
67 73
68 id = as.numeric(dat[i,"seq_conc"]) 74 id = as.numeric(dat[i,"seq_conc"])
69 75
70 functionality = paste(unique(allc[,"Functionality"]), collapse=",") 76 functionality = paste(unique(allc[,"Functionality"]), collapse=",")
142 tmp = names(NTresult) 148 tmp = names(NTresult)
143 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z)) 149 NTresult = cbind(NTresult, data.frame(new.col.x, new.col.y, new.col.z))
144 names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep="")) 150 names(NTresult) = c(tmp, paste(clazz, c("x", "y", "z"), sep=""))
145 } 151 }
146 152
153 write.table(NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")], NToverview.file, quote=F, sep="\t", row.names=F, col.names=T)
154
155 NToverview = NToverview[!grepl("unmatched", NToverview$best_match),]
156
147 new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G)) 157 new.col.x = c(sum(NToverview$A), sum(NToverview$C), sum(NToverview$T), sum(NToverview$G))
148 new.col.y = sum(new.col.x) 158 new.col.y = sum(new.col.x)
149 new.col.z = round(new.col.x / new.col.y * 100, 2) 159 new.col.z = round(new.col.x / new.col.y * 100, 2)
150 160
151 tmp = names(NTresult) 161 tmp = names(NTresult)
158 168
159 print(hotspot.analysis.sum) 169 print(hotspot.analysis.sum)
160 170
161 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0") 171 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0")
162 172
163 write.table(NToverview[,c("Sequence.ID", "best_match", "seq", "A", "C", "G", "T")], NToverview.file, quote=F, sep="\t", row.names=F, col.names=T) 173
164 174
165 175
166 176
167 177
168 178
169 179
170 180
171 181
172 182
173 183
174 184
175 185
176 186
177 187
178 188
179 189
180 190
181 191
182 192
183 193
184 194
185 195
186 196
187 197
188 198
189 199
190 200
191 201
192
193