Mercurial > repos > davidvanzessen > mutation_analysis
comparison sequence_overview.r @ 102:e6bc976760d4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 21 Jun 2016 03:32:50 -0400 |
parents | ff5be711382b |
children | e21cbe15381f |
comparison
equal
deleted
inserted
replaced
101:3cffb8a38bb1 | 102:e6bc976760d4 |
---|---|
15 | 15 |
16 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | 16 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") |
17 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | 17 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") |
18 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") | 18 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") |
19 | 19 |
20 before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] | 20 #before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] |
21 | 21 |
22 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) | 22 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) |
23 | 23 |
24 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] | 24 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] |
25 IDs$best_match = as.character(IDs$best_match) | 25 IDs$best_match = as.character(IDs$best_match) |
44 make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") } | 44 make.link = function(id, clss, val) { paste("<a href='", clss, "_", id, ".html'>", val, "</a>", sep="") } |
45 tbl = function(df) { res = "<table border='1'>"; for(i in 1:nrow(df)){ res = paste(res, tr(df[i,]), sep=""); }; res = paste(res, "</table>"); } | 45 tbl = function(df) { res = "<table border='1'>"; for(i in 1:nrow(df)){ res = paste(res, tr(df[i,]), sep=""); }; res = paste(res, "</table>"); } |
46 | 46 |
47 cat("<table border='1'>", file=main.html, append=F) | 47 cat("<table border='1'>", file=main.html, append=F) |
48 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) | 48 cat("<caption>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</caption>", file=main.html, append=T) |
49 cat("<tr><th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th></tr>", file=main.html, append=T) | 49 cat("<tr><th>Sequence</th><th>Functionality</th><th>ca1</th><th>ca2</th><th>cg1</th><th>cg2</th><th>cg3</th><th>cg4</th><th>cm</th><th>un</th></tr>", file=main.html, append=T) |
50 | 50 |
51 | 51 |
52 | 52 |
53 single.sequences=0 #sequence only found once, skipped | 53 single.sequences=0 #sequence only found once, skipped |
54 in.multiple=0 #same sequence across multiple subclasses | 54 in.multiple=0 #same sequence across multiple subclasses |
55 multiple.in.one=0 #same sequence multiple times in one subclass | 55 multiple.in.one=0 #same sequence multiple times in one subclass |
56 unmatched=0 #all of the sequences are unmatched | |
57 some.unmatched=0 #one or more sequences in a clone are unmatched | |
56 matched=0 #should be the same als matched sequences | 58 matched=0 #should be the same als matched sequences |
59 | |
60 sequence.id.page="by_id.html" | |
57 | 61 |
58 for(i in 1:nrow(dat)){ | 62 for(i in 1:nrow(dat)){ |
59 | 63 |
60 ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca1", IDs$best_match),] | 64 ca1 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca1", IDs$best_match),] |
61 ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca2", IDs$best_match),] | 65 ca2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^ca2", IDs$best_match),] |
64 cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg2", IDs$best_match),] | 68 cg2 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg2", IDs$best_match),] |
65 cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg3", IDs$best_match),] | 69 cg3 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg3", IDs$best_match),] |
66 cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg4", IDs$best_match),] | 70 cg4 = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cg4", IDs$best_match),] |
67 | 71 |
68 cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cm", IDs$best_match),] | 72 cm = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^cm", IDs$best_match),] |
69 allc = rbind(ca1, ca2, cg1, cg2, cg3, cg4, cm) | 73 |
70 | 74 un = IDs[IDs$seq_conc == dat[i,c("seq_conc")] & grepl("^unmatched", IDs$best_match),] |
71 classes = c(nrow(ca1), nrow(ca2), nrow(cg1), nrow(cg2), nrow(cg3), nrow(cg4), nrow(cm)) | 75 allc = rbind(ca1, ca2, cg1, cg2, cg3, cg4, cm, un) |
76 | |
77 classes = c(nrow(ca1), nrow(ca2), nrow(cg1), nrow(cg2), nrow(cg3), nrow(cg4), nrow(cm), nrow(un)) | |
72 | 78 |
73 classes.sum = sum(classes) | 79 classes.sum = sum(classes) |
74 | 80 |
75 if(classes.sum == 1){ | 81 if(classes.sum == 1){ |
76 single.sequences = single.sequences + 1 | 82 single.sequences = single.sequences + 1 |
77 next | 83 next |
78 } | 84 } |
79 | 85 |
86 if(nrow(un) == classes.sum){ | |
87 unmatched = unmatched + 1 | |
88 next | |
89 } | |
90 | |
80 matched = matched + sum(classes > 0) #count in how many subclasses the sequence occurs. | 91 matched = matched + sum(classes > 0) #count in how many subclasses the sequence occurs. |
81 | 92 |
82 if(any(classes == classes.sum)){ | 93 if(any(classes == classes.sum)){ |
94 multiple.in.one = multiple.in.one + 1 | |
95 } else if (nrow(un) > 0) { | |
96 some.unmatched = some.unmatched + 1 | |
97 } else { | |
83 in.multiple = in.multiple + 1 | 98 in.multiple = in.multiple + 1 |
84 } else { | |
85 multiple.in.one = multiple.in.one + 1 | |
86 } | 99 } |
87 | 100 |
88 id = as.numeric(dat[i,"seq_conc"]) | 101 id = as.numeric(dat[i,"seq_conc"]) |
89 | 102 |
90 functionality = paste(unique(allc[,"Functionality"]), collapse=",") | 103 functionality = paste(unique(allc[,"Functionality"]), collapse=",") |
91 | 104 |
105 by.id.row = c() | |
106 | |
92 if(nrow(ca1) > 0){ | 107 if(nrow(ca1) > 0){ |
93 cat(tbl(ca1), file=paste("ca1_", id, ".html", sep="")) | 108 cat(tbl(ca1), file=paste("ca1_", id, ".html", sep="")) |
94 } | 109 } |
95 | 110 |
96 if(nrow(ca2) > 0){ | 111 if(nrow(ca2) > 0){ |
113 cat(tbl(cg4), file=paste("cg4_", id, ".html", sep="")) | 128 cat(tbl(cg4), file=paste("cg4_", id, ".html", sep="")) |
114 } | 129 } |
115 | 130 |
116 if(nrow(cm) > 0){ | 131 if(nrow(cm) > 0){ |
117 cat(tbl(cm), file=paste("cm_", id, ".html", sep="")) | 132 cat(tbl(cm), file=paste("cm_", id, ".html", sep="")) |
133 } | |
134 | |
135 if(nrow(un) > 0){ | |
136 cat(tbl(un), file=paste("un_", id, ".html", sep="")) | |
118 } | 137 } |
119 | 138 |
120 ca1.html = make.link(id, "ca1", nrow(ca1)) | 139 ca1.html = make.link(id, "ca1", nrow(ca1)) |
121 ca2.html = make.link(id, "ca2", nrow(ca2)) | 140 ca2.html = make.link(id, "ca2", nrow(ca2)) |
122 | 141 |
125 cg3.html = make.link(id, "cg3", nrow(cg3)) | 144 cg3.html = make.link(id, "cg3", nrow(cg3)) |
126 cg4.html = make.link(id, "cg4", nrow(cg4)) | 145 cg4.html = make.link(id, "cg4", nrow(cg4)) |
127 | 146 |
128 cm.html = make.link(id, "cm", nrow(cm)) | 147 cm.html = make.link(id, "cm", nrow(cm)) |
129 | 148 |
130 rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html) | 149 un.html = make.link(id, "un", nrow(un)) |
150 | |
151 #rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, un.html) | |
152 rw = c(as.character(dat[i,"seq_conc"]), functionality, ca1.html, ca2.html, cg1.html, cg2.html, cg3.html, cg4.html, cm.html, un.html) | |
131 | 153 |
132 cat(tr(rw), file=main.html, append=T) | 154 cat(tr(rw), file=main.html, append=T) |
155 | |
156 | |
157 for(i in 1:nrow(allc)){ #generate html by id | |
158 html = make.link(id, allc[i,"best_match"], allc[i,"Sequence.ID"]) | |
159 cat(paste(html, "<br />"), file=sequence.id.page, append=T) | |
160 } | |
133 } | 161 } |
134 | 162 |
135 cat("</table>", file=main.html, append=T) | 163 cat("</table>", file=main.html, append=T) |
136 | 164 |
137 print(paste("Single sequences:", single.sequences)) | 165 print(paste("Single sequences:", single.sequences)) |
138 print(paste("Sequences in multiple subclasses:", in.multiple)) | 166 print(paste("Sequences in multiple subclasses:", in.multiple)) |
139 print(paste("Multiple sequences in one subclass:", multiple.in.one)) | 167 print(paste("Multiple sequences in one subclass:", multiple.in.one)) |
168 print(paste("Matched with unmatched:", some.unmatched)) | |
140 print(paste("Count that should match 'matched' sequences:", matched)) | 169 print(paste("Count that should match 'matched' sequences:", matched)) |
141 | 170 |
142 #ACGT overview | 171 #ACGT overview |
143 | 172 |
144 NToverview = merged | 173 NToverview = merged |