Mercurial > repos > davidvanzessen > mutation_analysis
comparison sequence_overview.r @ 99:86206431cbb0 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 16 Jun 2016 10:01:54 -0400 |
parents | 5ffbf40cdd4b |
children | ff5be711382b |
comparison
equal
deleted
inserted
replaced
98:5ffbf40cdd4b | 99:86206431cbb0 |
---|---|
1 library(reshape2) | 1 library(reshape2) |
2 | 2 |
3 args <- commandArgs(trailingOnly = TRUE) | 3 args <- commandArgs(trailingOnly = TRUE) |
4 | 4 |
5 input.file = args[1] | 5 input.file = args[1] |
6 input.file = args[2] | 6 outputdir = args[2] |
7 outputdir = args[3] | 7 gene.classes = unlist(strsplit(args[3], ",")) |
8 gene.classes = unlist(strsplit(args[4], ",")) | 8 hotspot.analysis.sum.file = args[4] |
9 hotspot.analysis.sum.file = args[5] | |
10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") | 9 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") |
11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") | 10 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") |
12 main.html = "index.html" | 11 main.html = "index.html" |
13 | 12 |
14 setwd(outputdir) | 13 setwd(outputdir) |
39 td = function(val) { paste("<td>", val, "</td>", sep="") } | 38 td = function(val) { paste("<td>", val, "</td>", sep="") } |
40 tr = function(val) { capture.output(cat("<tr>", td(val), "</tr>", sep="")) } | 39 tr = function(val) { capture.output(cat("<tr>", td(val), "</tr>", sep="")) } |
41 make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") } | 40 make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") } |
42 tbl = function(df) { res = "<table border='1'>"; for(i in 1:nrow(df)){ res = paste(res, tr(df[i,]), sep=""); }; res = paste(res, "</table>"); } | 41 tbl = function(df) { res = "<table border='1'>"; for(i in 1:nrow(df)){ res = paste(res, tr(df[i,]), sep=""); }; res = paste(res, "</table>"); } |
43 | 42 |
44 print(paste("Number of unique sequences to be written to the sequence overview page", nrow(dat))) | |
45 | |
46 cat("<table border='1'>", file=main.html, append=F) | 43 cat("<table border='1'>", file=main.html, append=F) |
47 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) | 44 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) |
48 cat("<tr><th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th></tr>", file=main.html, append=T) | 45 cat("<tr><th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th></tr>", file=main.html, append=T) |
46 | |
47 | |
48 | |
49 single.sequences=0 #sequence only found once, skipped | |
50 in.multiple=0 #same sequence across multiple subclasses | |
51 multiple.in.one=0 #same sequence multiple times in one subclass | |
52 matched=0 #should be the same als matched sequences | |
49 | 53 |
50 for(i in 1:nrow(dat)){ | 54 for(i in 1:nrow(dat)){ |
51 | 55 |
52 ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca1", IDs$best_match),] | 56 ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca1", IDs$best_match),] |
53 ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca2", IDs$best_match),] | 57 ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca2", IDs$best_match),] |
63 classes = c(nrow(ca1), nrow(ca2), nrow(cg1), nrow(cg2), nrow(cg3), nrow(cg4), nrow(cm)) | 67 classes = c(nrow(ca1), nrow(ca2), nrow(cg1), nrow(cg2), nrow(cg3), nrow(cg4), nrow(cm)) |
64 | 68 |
65 classes.sum = sum(classes) | 69 classes.sum = sum(classes) |
66 | 70 |
67 if(classes.sum == 1){ | 71 if(classes.sum == 1){ |
68 print(paste("next", i, classes.sum)) | 72 single.sequences = single.sequences + 1 |
69 next | 73 next |
74 } | |
75 | |
76 matched = matched + sum(classes > 0) #count in how many subclasses the sequence occurs. | |
77 | |
78 if(any(classes == classes.sum)){ | |
79 in.multiple = in.multiple + 1 | |
70 } else { | 80 } else { |
71 print(i) | 81 multiple.in.one = multiple.in.one + 1 |
72 } | 82 } |
73 | 83 |
74 id = as.numeric(dat[i,"seq_conc"]) | 84 id = as.numeric(dat[i,"seq_conc"]) |
75 | 85 |
76 functionality = paste(unique(allc[,"Functionality"]), collapse=",") | 86 functionality = paste(unique(allc[,"Functionality"]), collapse=",") |
118 cat(tr(rw), file=main.html, append=T) | 128 cat(tr(rw), file=main.html, append=T) |
119 } | 129 } |
120 | 130 |
121 cat("</table>", file=main.html, append=T) | 131 cat("</table>", file=main.html, append=T) |
122 | 132 |
133 print(paste("Single sequences:", single.sequences)) | |
134 print(paste("Sequences in multiple subclasses:", in.multiple)) | |
135 print(paste("Multiple sequences in one subclass:", multiple.in.one)) | |
136 print(paste("Count that should match 'matched' sequences:", matched)) | |
123 | 137 |
124 #ACGT overview | 138 #ACGT overview |
125 | 139 |
126 NToverview = merged | 140 NToverview = merged |
127 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") | 141 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") |
128 | 142 |
129 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) | 143 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) |
130 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) | 144 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) |
131 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) | 145 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) |
132 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) | 146 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) |
133 | |
134 print(sum(colSums(NToverview[,c("A", "C", "T", "G")]))) | |
135 | 147 |
136 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) | 148 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) |
137 | 149 |
138 #NToverview = rbind(NToverview, NTsum) | 150 #NToverview = rbind(NToverview, NTsum) |
139 | 151 |
164 | 176 |
165 names(hotspot.analysis.sum) = names(NTresult) | 177 names(hotspot.analysis.sum) = names(NTresult) |
166 | 178 |
167 hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult) | 179 hotspot.analysis.sum = rbind(hotspot.analysis.sum, NTresult) |
168 | 180 |
169 print(hotspot.analysis.sum) | |
170 | |
171 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0") | 181 write.table(hotspot.analysis.sum, hotspot.analysis.sum.file, quote=F, sep=",", row.names=F, col.names=F, na="0") |
172 | 182 |
173 | 183 |
174 | 184 |
175 | 185 |