0
|
1 args <- commandArgs(trailingOnly = TRUE)
|
|
2
|
|
3 inFile = args[1]
|
|
4 outDir = args[2]
|
3
|
5 logfile = args[3]
|
|
6 min_freq = as.numeric(args[4])
|
|
7 min_cells = as.numeric(args[5])
|
29
|
8 mergeOn = args[6]
|
3
|
9
|
|
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
|
0
|
11
|
4
|
12 library(ggplot2)
|
|
13 library(reshape2)
|
|
14 library(data.table)
|
|
15 library(grid)
|
|
16 library(parallel)
|
0
|
17 #require(xtable)
|
3
|
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
|
13
|
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
|
34
|
20 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Related_to_leukemia_clone", "Clone_Sequence")]
|
|
21 dat$dsPerM = 0
|
9
|
22 dat = dat[!is.na(dat$Patient),]
|
13
|
23 dat$Related_to_leukemia_clone = F
|
9
|
24
|
0
|
25 setwd(outDir)
|
3
|
26 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
|
2
|
27 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
|
|
28 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
|
|
29
|
3
|
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
|
12
|
31
|
13
|
32 dat$Frequency = ((10^dat$Log10_Frequency)*100)
|
2
|
33
|
3
|
34 dat = dat[dat$Frequency >= min_freq,]
|
|
35
|
13
|
36 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
|
|
37
|
|
38 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
39
|
|
40 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
|
|
41 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
|
|
42 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
|
|
43
|
|
44 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
|
|
45 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
46
|
|
47 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
34
|
48 print(paste("rows:", nrow(dat)))
|
13
|
49 dat = merge(dat, min_cell_count, by="min_cell_paste")
|
34
|
50 print(paste("rows:", nrow(dat)))
|
13
|
51 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
52
|
3
|
53 dat = dat[dat$normalized_read_count >= min_cells,]
|
13
|
54
|
|
55 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
|
9
|
56
|
22
|
57 patients = split(dat, dat$Patient, drop=T)
|
9
|
58 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
|
6
|
59 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
|
0
|
60 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
|
|
61 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
|
|
62 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
|
|
63 Titles = factor(Titles, levels=Titles)
|
|
64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
|
|
65
|
29
|
66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
|
|
67
|
51
|
68 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
|
|
69 patient.merge.list.second = list()
|
56
|
70 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
|
|
71 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
|
0
|
72 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
|
28
|
73 if (!is.data.frame(x) & is.list(x)){
|
|
74 x = x[[1]]
|
|
75 }
|
34
|
76 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
|
|
77 x = data.frame(x,stringsAsFactors=F)
|
0
|
78 onShort = "reads"
|
|
79 if(on == "Frequency"){
|
|
80 onShort = "freq"
|
|
81 }
|
18
|
82 onx = paste(on, ".x", sep="")
|
|
83 ony = paste(on, ".y", sep="")
|
0
|
84 splt = split(x, x$Sample, drop=T)
|
4
|
85 type="pair"
|
2
|
86 if(length(splt) == 1){
|
3
|
87 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
|
4
|
88 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
|
|
89 type="single"
|
2
|
90 }
|
0
|
91 patient1 = splt[[1]]
|
|
92 patient2 = splt[[2]]
|
|
93
|
|
94 threshholdIndex = which(colnames(product) == "interval")
|
|
95 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
96 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
97 titleIndex = which(colnames(product) == "Titles")
|
|
98 sampleIndex = which(colnames(x) == "Sample")
|
|
99 patientIndex = which(colnames(x) == "Patient")
|
|
100 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
101 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
102 patient = paste(x[1,patientIndex])
|
3
|
103
|
0
|
104 switched = F
|
|
105 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
|
|
106 tmp = twoSample
|
|
107 twoSample = oneSample
|
|
108 oneSample = tmp
|
|
109 tmp = patient1
|
|
110 patient1 = patient2
|
|
111 patient2 = tmp
|
|
112 switched = T
|
|
113 }
|
|
114 if(appendtxt){
|
4
|
115 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
|
0
|
116 }
|
51
|
117 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
|
9
|
118
|
29
|
119 if(mergeOn == "Clone_Sequence"){
|
|
120 patient1$merge = paste(patient1$Clone_Sequence)
|
|
121 patient2$merge = paste(patient2$Clone_Sequence)
|
|
122 } else {
|
|
123 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
124 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
125 }
|
9
|
126
|
30
|
127 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
29
|
128 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
|
30
|
129 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
|
130 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
|
29
|
131 scatterplot_data$on = onShort
|
|
132
|
49
|
133 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
|
|
134 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
|
|
135
|
|
136 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
|
|
137
|
51
|
138 start.time = proc.time()
|
|
139 merge.list = c()
|
|
140
|
|
141 if(patient %in% names(patient.merge.list)){
|
|
142 patientMerge = patient.merge.list[[patient]]
|
|
143 merge.list[["second"]] = patient.merge.list.second[[patient]]
|
|
144 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
|
|
145
|
|
146 print(names(patient.merge.list))
|
|
147 } else {
|
|
148 #fuzzy matching here...
|
49
|
149 #merge.list = patientMerge$merge
|
51
|
150
|
49
|
151 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
|
|
152 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
|
|
153
|
|
154 patient1.fuzzy = patient1
|
|
155 patient2.fuzzy = patient2
|
|
156
|
36
|
157 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
|
|
158 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
|
51
|
159
|
48
|
160 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
|
|
161 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
|
51
|
162
|
48
|
163 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
|
|
164 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
|
51
|
165
|
49
|
166 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
|
|
167 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
|
51
|
168
|
49
|
169 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
|
|
170 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
|
51
|
171
|
37
|
172 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
|
|
173 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
|
49
|
174
|
|
175 merge.list = list()
|
|
176
|
|
177 merge.list[["second"]] = vector()
|
|
178
|
37
|
179 while(nrow(patient.fuzzy) > 1){
|
|
180 first.merge = patient.fuzzy[1,"merge"]
|
|
181 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
|
49
|
182 first.sample = patient.fuzzy[1,"Sample"]
|
37
|
183 merge.filter = first.merge == patient.fuzzy$merge
|
51
|
184
|
49
|
185 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
|
51
|
186
|
49
|
187 first.sample.filter = first.sample == patient.fuzzy$Sample
|
|
188 second.sample.filter = first.sample != patient.fuzzy$Sample
|
|
189
|
|
190 #first match same sample, sum to a single row, same for other sample
|
|
191 #then merge rows like 'normal'
|
51
|
192
|
48
|
193 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
|
49
|
194
|
|
195
|
|
196
|
44
|
197 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
|
49
|
198 first.match.filter = merge.filter & sequence.filter & first.sample.filter
|
|
199 second.match.filter = merge.filter & sequence.filter & second.sample.filter
|
|
200
|
|
201 first.rows = patient.fuzzy[first.match.filter,]
|
|
202 second.rows = patient.fuzzy[second.match.filter,]
|
|
203
|
50
|
204 first.rows.v = table(first.rows$V_Segment_Major_Gene)
|
|
205 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
|
|
206 first.rows.j = table(first.rows$J_Segment_Major_Gene)
|
|
207 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
|
|
208
|
49
|
209 first.sum = data.frame(merge = first.clone.sequence,
|
|
210 Patient = patient,
|
|
211 Receptor = first.rows[1,"Receptor"],
|
|
212 Sample = first.rows[1,"Sample"],
|
|
213 Cell_Count = first.rows[1,"Cell_Count"],
|
|
214 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
|
|
215 Log10_Frequency = log10(sum(first.rows$Frequency)),
|
|
216 Total_Read_Count = sum(first.rows$Total_Read_Count),
|
|
217 dsPerM = sum(first.rows$dsPerM),
|
50
|
218 J_Segment_Major_Gene = first.rows.j,
|
|
219 V_Segment_Major_Gene = first.rows.v,
|
49
|
220 Clone_Sequence = first.clone.sequence,
|
|
221 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
|
|
222 Related_to_leukemia_clone = F,
|
|
223 Frequency = sum(first.rows$Frequency),
|
|
224 locus_V = first.rows[1,"locus_V"],
|
|
225 locus_J = first.rows[1,"locus_J"],
|
|
226 min_cell_count = first.rows[1,"min_cell_count"],
|
|
227 normalized_read_count = sum(first.rows$normalized_read_count),
|
|
228 paste = first.rows[1,"paste"],
|
|
229 min_cell_paste = first.rows[1,"min_cell_paste"])
|
|
230
|
|
231 if(nrow(second.rows) > 0){
|
50
|
232 second.rows.v = table(second.rows$V_Segment_Major_Gene)
|
|
233 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
|
|
234 second.rows.j = table(second.rows$J_Segment_Major_Gene)
|
|
235 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
|
|
236
|
55
|
237 second.sum = data.frame(merge = first.clone.sequence,
|
49
|
238 Patient = patient,
|
|
239 Receptor = second.rows[1,"Receptor"],
|
|
240 Sample = second.rows[1,"Sample"],
|
|
241 Cell_Count = second.rows[1,"Cell_Count"],
|
|
242 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
|
|
243 Log10_Frequency = log10(sum(second.rows$Frequency)),
|
|
244 Total_Read_Count = sum(second.rows$Total_Read_Count),
|
|
245 dsPerM = sum(second.rows$dsPerM),
|
50
|
246 J_Segment_Major_Gene = second.rows.j,
|
|
247 V_Segment_Major_Gene = second.rows.v,
|
49
|
248 Clone_Sequence = first.clone.sequence,
|
|
249 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
|
|
250 Related_to_leukemia_clone = F,
|
|
251 Frequency = sum(second.rows$Frequency),
|
|
252 locus_V = second.rows[1,"locus_V"],
|
|
253 locus_J = second.rows[1,"locus_J"],
|
|
254 min_cell_count = second.rows[1,"min_cell_count"],
|
|
255 normalized_read_count = sum(second.rows$normalized_read_count),
|
|
256 paste = second.rows[1,"paste"],
|
|
257 min_cell_paste = second.rows[1,"min_cell_paste"])
|
|
258
|
|
259 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
|
|
260 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
|
|
261
|
50
|
262 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
|
|
263 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
49
|
264
|
56
|
265 tmp.rows = rbind(first.rows, second.rows)
|
|
266 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
|
|
267
|
|
268 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
|
|
269 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
|
|
270 } else {
|
|
271 second.clone.sequence = second.rows[1,"Clone_Sequence"]
|
|
272 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
|
|
273 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
|
|
274 } else {
|
58
|
275 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
|
49
|
276 }
|
|
277 }
|
|
278
|
59
|
279 } else if(nrow(first.rows) > 1) {
|
|
280 if(patient1[1,"Sample"] == first.sample){
|
|
281 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
|
|
282 patient1 = rbind(patient1, first.sum)
|
|
283 } else {
|
|
284 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
|
|
285 patient2 = rbind(patient2, first.sum)
|
|
286 }
|
|
287
|
|
288 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
|
|
289 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
|
290
|
|
291 patient.fuzzy = patient.fuzzy[-first.match.filter,]
|
|
292
|
|
293 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
|
37
|
294 } else {
|
|
295 patient.fuzzy = patient.fuzzy[-1,]
|
34
|
296 }
|
|
297 }
|
51
|
298 patient.merge.list[[patient]] <<- patientMerge
|
|
299 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
|
|
300 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
|
34
|
301 }
|
51
|
302
|
52
|
303 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
|
|
304 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
|
51
|
305
|
37
|
306
|
18
|
307 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
|
0
|
308 res1 = vector()
|
|
309 res2 = vector()
|
|
310 resBoth = vector()
|
|
311 read1Count = vector()
|
|
312 read2Count = vector()
|
2
|
313 locussum1 = vector()
|
|
314 locussum2 = vector()
|
9
|
315
|
0
|
316 #for(iter in 1){
|
|
317 for(iter in 1:length(product[,1])){
|
|
318 threshhold = product[iter,threshholdIndex]
|
|
319 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
320 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
321 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
|
29
|
322 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
|
30
|
323 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
|
|
324 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
|
14
|
325 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
|
|
326 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
|
0
|
327 res1 = append(res1, sum(one))
|
2
|
328 res2 = append(res2, sum(two))
|
0
|
329 resBoth = append(resBoth, sum(both))
|
2
|
330 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
|
|
331 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
|
0
|
332 #threshhold = 0
|
|
333 if(threshhold != 0){
|
|
334 if(sum(one) > 0){
|
15
|
335 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
336 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
337 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
338 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
339 }
|
|
340 if(sum(two) > 0){
|
15
|
341 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
342 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
343 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
344 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
345 }
|
29
|
346 } else {
|
|
347 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
49
|
348 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),]
|
|
349 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
|
29
|
350 if(nrow(scatterplot_locus_data) > 0){
|
|
351 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
|
|
352 }
|
34
|
353 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
|
|
354 in_two = (scatterplot_locus_data$merge %in% patient2$merge)
|
|
355 if(any(in_two)){
|
49
|
356 scatterplot_locus_data[in_two,]$type = twoSample
|
34
|
357 }
|
49
|
358 in_both = (scatterplot_locus_data$merge %in% patientMerge$merge)
|
|
359 #merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]])
|
|
360 #exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches)
|
30
|
361 if(any(in_both)){
|
|
362 scatterplot_locus_data[in_both,]$type = "In Both"
|
|
363 }
|
29
|
364 if(type == "single"){
|
|
365 single_patients <<- rbind(single_patients, scatterplot_locus_data)
|
|
366 }
|
|
367 p = NULL
|
|
368 if(nrow(scatterplot_locus_data) != 0){
|
|
369 if(on == "normalized_read_count"){
|
30
|
370 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
49
|
371 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
|
29
|
372 } else {
|
49
|
373 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
|
29
|
374 }
|
|
375 p = p + geom_point(aes(colour=type), position="jitter")
|
|
376 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
377 } else {
|
|
378 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
379 }
|
|
380 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
381 print(p)
|
|
382 dev.off()
|
0
|
383 }
|
|
384 if(sum(both) > 0){
|
15
|
385 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
386 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
0
|
387 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
388 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
29
|
389 }
|
0
|
390 }
|
2
|
391 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
|
0
|
392 if(sum(is.na(patientResult$percentage)) > 0){
|
|
393 patientResult[is.na(patientResult$percentage),]$percentage = 0
|
|
394 }
|
|
395 colnames(patientResult)[6] = oneSample
|
|
396 colnames(patientResult)[8] = twoSample
|
|
397 colnamesBak = colnames(patientResult)
|
2
|
398 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
|
0
|
399 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
400 colnames(patientResult) = colnamesBak
|
|
401
|
|
402 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
403 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
404
|
|
405 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
|
|
406 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
|
|
407 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
408 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
|
|
409 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
|
|
410 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
411 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
412 print(plt)
|
|
413 dev.off()
|
|
414 #(t,r,b,l)
|
|
415 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
|
|
416 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
|
|
417 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
418 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
|
|
419 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
|
|
420 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
421 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
422 print(plt)
|
|
423 dev.off()
|
|
424
|
|
425 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
|
|
426 patientResult$relativeValue = patientResult$value * 10
|
|
427 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
428 plt = ggplot(patientResult)
|
|
429 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
430 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
431 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
432 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
|
|
433 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
|
|
434 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
|
|
435 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
|
|
436 print(plt)
|
|
437 dev.off()
|
|
438 }
|
|
439
|
3
|
440 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
441
|
0
|
442 interval = intervalFreq
|
|
443 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
444 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
61
|
445 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
|
0
|
446
|
3
|
447 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
448
|
0
|
449 interval = intervalReads
|
|
450 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
61
|
452 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
|
33
|
453
|
|
454 if(nrow(single_patients) > 0){
|
|
455 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
456 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
|
457 p = p + geom_point(aes(colour=type), position="jitter")
|
|
458 p = p + xlab("In one or both samples") + ylab("Reads")
|
|
459 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
|
|
460 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
|
|
461 print(p)
|
|
462 dev.off()
|
7
|
463
|
33
|
464 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
|
465 p = p + geom_point(aes(colour=type), position="jitter")
|
|
466 p = p + xlab("In one or both samples") + ylab("Frequency")
|
|
467 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
468 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
|
|
469 print(p)
|
|
470 dev.off()
|
|
471 } else {
|
|
472 empty <- data.frame()
|
|
473 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
474
|
|
475 png("singles_reads_scatterplot.png", width=400, height=300)
|
|
476 print(p)
|
|
477 dev.off()
|
|
478
|
|
479 png("singles_freq_scatterplot.png", width=400, height=300)
|
|
480 print(p)
|
|
481 dev.off()
|
|
482 }
|
60
|
483
|
|
484 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
|
|
485 patient.merge.list.second = list()
|
|
486
|
7
|
487 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
|
|
488 onShort = "reads"
|
|
489 if(on == "Frequency"){
|
|
490 onShort = "freq"
|
|
491 }
|
18
|
492 onx = paste(on, ".x", sep="")
|
|
493 ony = paste(on, ".y", sep="")
|
|
494 onz = paste(on, ".z", sep="")
|
7
|
495 type="triplet"
|
|
496
|
|
497 threshholdIndex = which(colnames(product) == "interval")
|
|
498 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
499 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
500 titleIndex = which(colnames(product) == "Titles")
|
|
501 sampleIndex = which(colnames(patient1) == "Sample")
|
|
502 patientIndex = which(colnames(patient1) == "Patient")
|
|
503 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
504 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
505 threeSample = paste(patient3[1,sampleIndex], sep="")
|
60
|
506
|
29
|
507 if(mergeOn == "Clone_Sequence"){
|
|
508 patient1$merge = paste(patient1$Clone_Sequence)
|
|
509 patient2$merge = paste(patient2$Clone_Sequence)
|
|
510 patient3$merge = paste(patient3$Clone_Sequence)
|
|
511
|
|
512 } else {
|
|
513 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
514 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
515 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
|
|
516 }
|
60
|
517
|
|
518 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
|
|
519 patient1.fuzzy = patient1
|
|
520 patient2.fuzzy = patient2
|
|
521 patient3.fuzzy = patient3
|
|
522
|
|
523 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
|
|
524
|
|
525 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
|
|
526 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
|
|
527 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
|
|
528
|
|
529 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
|
|
530
|
|
531 other.sample.list = list()
|
|
532 other.sample.list[[oneSample]] = c(twoSample, threeSample)
|
|
533 other.sample.list[[twoSample]] = c(oneSample, threeSample)
|
|
534 other.sample.list[[threeSample]] = c(oneSample, twoSample)
|
|
535
|
9
|
536 patientMerge = merge(patient1, patient2, by="merge")
|
|
537 patientMerge = merge(patientMerge, patient3, by="merge")
|
28
|
538 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
|
60
|
539 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
|
540 patientMerge = patientMerge[NULL,]
|
|
541
|
|
542 duo.merge.list = list()
|
|
543
|
20
|
544 patientMerge12 = merge(patient1, patient2, by="merge")
|
60
|
545 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
546 patientMerge12 = patientMerge12[NULL,]
|
|
547 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
|
|
548 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
|
|
549
|
20
|
550 patientMerge13 = merge(patient1, patient3, by="merge")
|
60
|
551 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
552 patientMerge13 = patientMerge13[NULL,]
|
|
553 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
|
|
554 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
|
|
555
|
20
|
556 patientMerge23 = merge(patient2, patient3, by="merge")
|
60
|
557 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
558 patientMerge23 = patientMerge23[NULL,]
|
|
559 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
|
|
560 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
|
|
561
|
|
562 merge.list = list()
|
|
563 merge.list[["second"]] = vector()
|
|
564
|
|
565 start.time = proc.time()
|
|
566 if(paste(label1, "123") %in% names(patient.merge.list)){
|
|
567 patientMerge = patient.merge.list[[paste(label1, "123")]]
|
|
568 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
|
|
569 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
|
|
570 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
|
|
571
|
|
572 merge.list[["second"]] = patient.merge.list.second[[label1]]
|
|
573
|
|
574 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
|
|
575 } else {
|
|
576 while(nrow(patient.fuzzy) > 0){
|
|
577 first.merge = patient.fuzzy[1,"merge"]
|
|
578 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
|
|
579 first.sample = patient.fuzzy[1,"Sample"]
|
|
580
|
|
581 merge.filter = first.merge == patient.fuzzy$merge
|
|
582
|
|
583 second.sample = other.sample.list[[first.sample]][1]
|
|
584 third.sample = other.sample.list[[first.sample]][2]
|
|
585
|
|
586 sample.filter.1 = first.sample == patient.fuzzy$Sample
|
|
587 sample.filter.2 = second.sample == patient.fuzzy$Sample
|
|
588 sample.filter.3 = third.sample == patient.fuzzy$Sample
|
|
589
|
|
590 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
|
|
591
|
|
592 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
|
|
593 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
|
|
594 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
|
|
595
|
|
596 matches.in.1 = any(match.filter.1)
|
|
597 matches.in.2 = any(match.filter.2)
|
|
598 matches.in.3 = any(match.filter.3)
|
|
599
|
|
600
|
|
601
|
|
602 rows.1 = patient.fuzzy[match.filter.1,]
|
|
603
|
|
604 sum.1 = data.frame(merge = first.clone.sequence,
|
|
605 Patient = label1,
|
|
606 Receptor = rows.1[1,"Receptor"],
|
|
607 Sample = rows.1[1,"Sample"],
|
|
608 Cell_Count = rows.1[1,"Cell_Count"],
|
|
609 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
|
|
610 Log10_Frequency = log10(sum(rows.1$Frequency)),
|
|
611 Total_Read_Count = sum(rows.1$Total_Read_Count),
|
|
612 dsPerM = sum(rows.1$dsPerM),
|
|
613 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
|
|
614 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
|
|
615 Clone_Sequence = first.clone.sequence,
|
|
616 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
|
|
617 Related_to_leukemia_clone = F,
|
|
618 Frequency = sum(rows.1$Frequency),
|
|
619 locus_V = rows.1[1,"locus_V"],
|
|
620 locus_J = rows.1[1,"locus_J"],
|
64
|
621 uniqueID = rows.1[1,"uniqueID"],
|
60
|
622 normalized_read_count = sum(rows.1$normalized_read_count))
|
|
623 sum.2 = sum.1[NULL,]
|
|
624 rows.2 = patient.fuzzy[match.filter.2,]
|
|
625 if(matches.in.2){
|
|
626 sum.2 = data.frame(merge = first.clone.sequence,
|
|
627 Patient = label1,
|
|
628 Receptor = rows.2[1,"Receptor"],
|
|
629 Sample = rows.2[1,"Sample"],
|
|
630 Cell_Count = rows.2[1,"Cell_Count"],
|
|
631 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
|
|
632 Log10_Frequency = log10(sum(rows.2$Frequency)),
|
|
633 Total_Read_Count = sum(rows.2$Total_Read_Count),
|
|
634 dsPerM = sum(rows.2$dsPerM),
|
|
635 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
|
|
636 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
|
|
637 Clone_Sequence = first.clone.sequence,
|
|
638 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
|
|
639 Related_to_leukemia_clone = F,
|
|
640 Frequency = sum(rows.2$Frequency),
|
|
641 locus_V = rows.2[1,"locus_V"],
|
|
642 locus_J = rows.2[1,"locus_J"],
|
64
|
643 uniqueID = rows.2[1,"uniqueID"],
|
60
|
644 normalized_read_count = sum(rows.2$normalized_read_count))
|
|
645 }
|
|
646
|
|
647 sum.3 = sum.1[NULL,]
|
|
648 rows.3 = patient.fuzzy[match.filter.3,]
|
|
649 if(matches.in.3){
|
|
650 sum.3 = data.frame(merge = first.clone.sequence,
|
|
651 Patient = label1,
|
|
652 Receptor = rows.3[1,"Receptor"],
|
|
653 Sample = rows.3[1,"Sample"],
|
|
654 Cell_Count = rows.3[1,"Cell_Count"],
|
|
655 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
|
|
656 Log10_Frequency = log10(sum(rows.3$Frequency)),
|
|
657 Total_Read_Count = sum(rows.3$Total_Read_Count),
|
|
658 dsPerM = sum(rows.3$dsPerM),
|
|
659 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
|
|
660 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
|
|
661 Clone_Sequence = first.clone.sequence,
|
|
662 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
|
|
663 Related_to_leukemia_clone = F,
|
|
664 Frequency = sum(rows.3$Frequency),
|
|
665 locus_V = rows.3[1,"locus_V"],
|
|
666 locus_J = rows.3[1,"locus_J"],
|
64
|
667 uniqueID = rows.3[1,"uniqueID"],
|
60
|
668 normalized_read_count = sum(rows.3$normalized_read_count))
|
|
669 }
|
|
670
|
|
671 if(matches.in.2 & matches.in.3){
|
|
672 merge.123 = merge(sum.1, sum.2, by="merge")
|
|
673 merge.123 = merge(merge.123, sum.3, by="merge")
|
|
674 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
|
|
675 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
|
|
676
|
|
677 patientMerge = rbind(patientMerge, merge.123)
|
|
678 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
|
|
679
|
|
680 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
|
|
681 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
|
682
|
|
683 } else if (matches.in.2) {
|
|
684 #other.sample1 = other.sample.list[[first.sample]][1]
|
|
685 #other.sample2 = other.sample.list[[first.sample]][2]
|
|
686
|
|
687 second.sample = sum.2[,"Sample"]
|
|
688
|
|
689 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
|
|
690
|
|
691 merge.12 = merge(sum.1, sum.2, by="merge")
|
|
692
|
|
693 current.merge.list = rbind(current.merge.list, merge.12)
|
|
694 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
|
|
695
|
|
696 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
|
|
697
|
|
698 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
|
|
699 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
|
700
|
|
701 } else if (matches.in.3) {
|
|
702
|
|
703 #other.sample1 = other.sample.list[[first.sample]][1]
|
|
704 #other.sample2 = other.sample.list[[first.sample]][2]
|
|
705
|
|
706 second.sample = sum.3[,"Sample"]
|
|
707
|
|
708 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
|
|
709
|
|
710 merge.13 = merge(sum.1, sum.3, by="merge")
|
|
711
|
|
712 current.merge.list = rbind(current.merge.list, merge.13)
|
|
713 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
|
|
714
|
|
715 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
|
|
716
|
|
717 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
|
|
718 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
|
719
|
63
|
720 } else if(nrow(rows.1) > 1){
|
|
721 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
|
64
|
722 print(names(patient1)[names(patient1) %in% sum.1])
|
|
723 print(names(patient1)[!(names(patient1) %in% sum.1)])
|
|
724 print(names(patient1))
|
|
725 print(names(sum.1))
|
|
726 print(summary(sum.1))
|
|
727 print(summary(patient1))
|
|
728 print(dim(sum.1))
|
|
729 print(dim(patient1))
|
|
730 print(head(sum.1[,names(patient1)]))
|
|
731 patient1 = rbind(patient1, sum.1[,names(patient1)])
|
63
|
732 patient.fuzzy = patient.fuzzy[-match.filter.1,]
|
60
|
733 } else {
|
|
734 patient.fuzzy = patient.fuzzy[-1,]
|
|
735 }
|
|
736
|
|
737 tmp.rows = rbind(rows.1, rows.2, rows.3)
|
|
738 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
|
|
739
|
|
740 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
|
|
741 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
|
|
742 } else {
|
|
743 }
|
|
744
|
|
745 }
|
|
746 patient.merge.list[[paste(label1, "123")]] = patientMerge
|
|
747
|
|
748 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
|
|
749 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
|
|
750 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
|
|
751
|
|
752 patient.merge.list[[paste(label1, "12")]] = patientMerge12
|
|
753 patient.merge.list[[paste(label1, "13")]] = patientMerge13
|
|
754 patient.merge.list[[paste(label1, "23")]] = patientMerge23
|
|
755
|
|
756 patient.merge.list.second[[label1]] = merge.list[["second"]]
|
|
757 }
|
|
758 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
|
|
759 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
|
760 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
761 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
20
|
762 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
60
|
763
|
63
|
764 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
|
|
765 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
|
|
766 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
|
62
|
767
|
60
|
768 if(F){
|
|
769 patientMerge = merge(patient1, patient2, by="merge")
|
|
770 patientMerge = merge(patientMerge, patient3, by="merge")
|
|
771 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
|
|
772 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
|
773 patientMerge12 = merge(patient1, patient2, by="merge")
|
|
774 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
775 patientMerge13 = merge(patient1, patient3, by="merge")
|
|
776 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
777 patientMerge23 = merge(patient2, patient3, by="merge")
|
|
778 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
779 }
|
28
|
780
|
30
|
781 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
20
|
782 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
|
30
|
783 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
27
|
784 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
|
20
|
785
|
7
|
786 res1 = vector()
|
|
787 res2 = vector()
|
|
788 res3 = vector()
|
20
|
789 res12 = vector()
|
|
790 res13 = vector()
|
|
791 res23 = vector()
|
7
|
792 resAll = vector()
|
|
793 read1Count = vector()
|
|
794 read2Count = vector()
|
|
795 read3Count = vector()
|
|
796
|
|
797 if(appendTriplets){
|
9
|
798 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
|
7
|
799 }
|
|
800 for(iter in 1:length(product[,1])){
|
|
801 threshhold = product[iter,threshholdIndex]
|
|
802 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
803 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
804 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
|
|
805 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
|
7
|
806
|
30
|
807 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
|
|
808 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
|
|
809 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
|
24
|
810
|
30
|
811 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
|
|
812 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
|
|
813 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
|
20
|
814
|
18
|
815 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
|
|
816 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
|
|
817 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
|
7
|
818 res1 = append(res1, sum(one))
|
|
819 res2 = append(res2, sum(two))
|
|
820 res3 = append(res3, sum(three))
|
|
821 resAll = append(resAll, sum(all))
|
20
|
822 res12 = append(res12, sum(one_two))
|
|
823 res13 = append(res13, sum(one_three))
|
|
824 res23 = append(res23, sum(two_three))
|
7
|
825 #threshhold = 0
|
|
826 if(threshhold != 0){
|
|
827 if(sum(one) > 0){
|
20
|
828 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
829 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
830 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
831 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
832 }
|
|
833 if(sum(two) > 0){
|
20
|
834 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
835 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
836 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
837 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
838 }
|
|
839 if(sum(three) > 0){
|
20
|
840 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
841 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
842 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
843 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
844 }
|
20
|
845 if(sum(one_two) > 0){
|
|
846 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
847 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
|
848 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
849 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
850 }
|
|
851 if(sum(one_three) > 0){
|
|
852 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
853 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
854 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
855 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
856 }
|
|
857 if(sum(two_three) > 0){
|
|
858 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
859 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
860 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
861 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
862 }
|
|
863 } else { #scatterplot data
|
24
|
864 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
60
|
865 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
|
30
|
866 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
|
27
|
867 if(sum(in_two) > 0){
|
|
868 scatterplot_locus_data[in_two,]$type = "In two"
|
|
869 }
|
30
|
870 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
|
27
|
871 if(sum(in_three)> 0){
|
|
872 scatterplot_locus_data[in_three,]$type = "In three"
|
|
873 }
|
|
874 not_in_one = scatterplot_locus_data$type != "In one"
|
|
875 if(sum(not_in_one) > 0){
|
|
876 scatterplot_locus_data[not_in_one,]$type = "In multiple"
|
|
877 }
|
20
|
878 p = NULL
|
|
879 if(nrow(scatterplot_locus_data) != 0){
|
|
880 if(on == "normalized_read_count"){
|
31
|
881 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
32
|
882 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
20
|
883 } else {
|
32
|
884 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
20
|
885 }
|
|
886 p = p + geom_point(aes(colour=type), position="jitter")
|
25
|
887 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
20
|
888 } else {
|
|
889 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
|
890 }
|
|
891 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
892 print(p)
|
|
893 dev.off()
|
|
894 }
|
7
|
895 if(sum(all) > 0){
|
20
|
896 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
|
|
897 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
7
|
898 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
899 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
900 }
|
|
901 }
|
20
|
902 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
|
|
903 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
|
7
|
904 colnames(patientResult)[6] = oneSample
|
20
|
905 colnames(patientResult)[7] = twoSample
|
|
906 colnames(patientResult)[8] = threeSample
|
|
907 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
|
|
908 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
|
|
909 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
|
7
|
910
|
|
911 colnamesBak = colnames(patientResult)
|
20
|
912 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
|
7
|
913 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
914 colnames(patientResult) = colnamesBak
|
|
915
|
|
916 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
917 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
918
|
|
919 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
|
|
920 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
|
|
921 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
922 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
|
|
923 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
|
|
924 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
925 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
|
|
926 print(plt)
|
|
927 dev.off()
|
|
928
|
|
929 fontSize = 4
|
|
930
|
|
931 bak = patientResult
|
|
932 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
|
|
933 patientResult$relativeValue = patientResult$value * 10
|
|
934 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
935 plt = ggplot(patientResult)
|
|
936 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
937 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
938 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
939 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
|
|
940 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
|
|
941 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
|
|
942 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
|
|
943 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
|
|
944 print(plt)
|
|
945 dev.off()
|
|
946 }
|
|
947
|
33
|
948 if(nrow(triplets) != 0){
|
60
|
949
|
|
950 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
|
|
951
|
33
|
952 triplets$uniqueID = "ID"
|
|
953
|
|
954 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
955 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
956 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
957
|
|
958 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
959 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
960 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
961
|
|
962 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
|
60
|
963
|
|
964 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
965
|
33
|
966 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
|
|
967 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
|
|
968 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
|
|
969
|
|
970 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
|
|
971 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
972
|
|
973 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
|
974
|
|
975 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
|
|
976
|
|
977 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
978
|
|
979 triplets = triplets[triplets$normalized_read_count >= min_cells,]
|
|
980
|
60
|
981 column_drops = c("min_cell_count", "min_cell_paste")
|
33
|
982
|
|
983 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
|
60
|
984
|
|
985 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
986
|
33
|
987 interval = intervalReads
|
|
988 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
989 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
990
|
|
991 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
992 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
993 three = triplets[triplets$Sample == "24062_reg_BM",]
|
61
|
994 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
|
33
|
995
|
|
996 one = triplets[triplets$Sample == "16278_Left",]
|
|
997 two = triplets[triplets$Sample == "26402_Left",]
|
|
998 three = triplets[triplets$Sample == "26759_Left",]
|
61
|
999 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
|
33
|
1000
|
|
1001 one = triplets[triplets$Sample == "16278_Right",]
|
|
1002 two = triplets[triplets$Sample == "26402_Right",]
|
|
1003 three = triplets[triplets$Sample == "26759_Right",]
|
53
|
1004 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
|
33
|
1005
|
60
|
1006 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
1007
|
33
|
1008 interval = intervalFreq
|
|
1009 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
1010 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
1011
|
|
1012 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
1013 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
1014 three = triplets[triplets$Sample == "24062_reg_BM",]
|
61
|
1015 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
|
33
|
1016
|
|
1017 one = triplets[triplets$Sample == "16278_Left",]
|
|
1018 two = triplets[triplets$Sample == "26402_Left",]
|
|
1019 three = triplets[triplets$Sample == "26759_Left",]
|
61
|
1020 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
|
33
|
1021
|
|
1022 one = triplets[triplets$Sample == "16278_Right",]
|
|
1023 two = triplets[triplets$Sample == "26402_Right",]
|
|
1024 three = triplets[triplets$Sample == "26759_Right",]
|
53
|
1025 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
|
33
|
1026 } else {
|
|
1027 cat("", file="triplets.txt")
|
|
1028 }
|
60
|
1029 cat("</table></html>", file=logfile, append=T) |