0
|
1 args <- commandArgs(trailingOnly = TRUE)
|
|
2
|
|
3 inFile = args[1]
|
|
4 outDir = args[2]
|
3
|
5 logfile = args[3]
|
|
6 min_freq = as.numeric(args[4])
|
|
7 min_cells = as.numeric(args[5])
|
29
|
8 mergeOn = args[6]
|
3
|
9
|
|
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
|
0
|
11
|
4
|
12 library(ggplot2)
|
|
13 library(reshape2)
|
|
14 library(data.table)
|
|
15 library(grid)
|
|
16 library(parallel)
|
0
|
17 #require(xtable)
|
3
|
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
|
13
|
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
|
34
|
20 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Related_to_leukemia_clone", "Clone_Sequence")]
|
|
21 dat$dsPerM = 0
|
9
|
22 dat = dat[!is.na(dat$Patient),]
|
13
|
23 dat$Related_to_leukemia_clone = F
|
9
|
24
|
0
|
25 setwd(outDir)
|
3
|
26 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
|
2
|
27 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
|
|
28 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
|
|
29
|
3
|
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
|
12
|
31
|
13
|
32 dat$Frequency = ((10^dat$Log10_Frequency)*100)
|
2
|
33
|
3
|
34 dat = dat[dat$Frequency >= min_freq,]
|
|
35
|
13
|
36 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
|
|
37
|
|
38 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
39
|
|
40 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
|
|
41 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
|
|
42 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
|
|
43
|
|
44 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
|
|
45 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
46
|
|
47 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
34
|
48 print(paste("rows:", nrow(dat)))
|
13
|
49 dat = merge(dat, min_cell_count, by="min_cell_paste")
|
34
|
50 print(paste("rows:", nrow(dat)))
|
13
|
51 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
52
|
3
|
53 dat = dat[dat$normalized_read_count >= min_cells,]
|
13
|
54
|
|
55 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
|
9
|
56
|
22
|
57 patients = split(dat, dat$Patient, drop=T)
|
9
|
58 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
|
6
|
59 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
|
0
|
60 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
|
|
61 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
|
|
62 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
|
|
63 Titles = factor(Titles, levels=Titles)
|
|
64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
|
|
65
|
29
|
66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
|
|
67
|
51
|
68 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
|
|
69 patient.merge.list.second = list()
|
56
|
70 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
|
|
71 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
|
0
|
72 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
|
28
|
73 if (!is.data.frame(x) & is.list(x)){
|
|
74 x = x[[1]]
|
|
75 }
|
34
|
76 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
|
|
77 x = data.frame(x,stringsAsFactors=F)
|
0
|
78 onShort = "reads"
|
|
79 if(on == "Frequency"){
|
|
80 onShort = "freq"
|
|
81 }
|
18
|
82 onx = paste(on, ".x", sep="")
|
|
83 ony = paste(on, ".y", sep="")
|
0
|
84 splt = split(x, x$Sample, drop=T)
|
4
|
85 type="pair"
|
2
|
86 if(length(splt) == 1){
|
3
|
87 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
|
4
|
88 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
|
|
89 type="single"
|
2
|
90 }
|
0
|
91 patient1 = splt[[1]]
|
|
92 patient2 = splt[[2]]
|
|
93
|
|
94 threshholdIndex = which(colnames(product) == "interval")
|
|
95 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
96 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
97 titleIndex = which(colnames(product) == "Titles")
|
|
98 sampleIndex = which(colnames(x) == "Sample")
|
|
99 patientIndex = which(colnames(x) == "Patient")
|
|
100 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
101 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
102 patient = paste(x[1,patientIndex])
|
3
|
103
|
0
|
104 switched = F
|
|
105 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
|
|
106 tmp = twoSample
|
|
107 twoSample = oneSample
|
|
108 oneSample = tmp
|
|
109 tmp = patient1
|
|
110 patient1 = patient2
|
|
111 patient2 = tmp
|
|
112 switched = T
|
|
113 }
|
|
114 if(appendtxt){
|
4
|
115 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
|
0
|
116 }
|
51
|
117 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
|
9
|
118
|
29
|
119 if(mergeOn == "Clone_Sequence"){
|
|
120 patient1$merge = paste(patient1$Clone_Sequence)
|
|
121 patient2$merge = paste(patient2$Clone_Sequence)
|
|
122 } else {
|
|
123 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
124 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
125 }
|
9
|
126
|
30
|
127 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
29
|
128 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
|
30
|
129 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
|
130 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
|
29
|
131 scatterplot_data$on = onShort
|
|
132
|
49
|
133 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
|
|
134 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
|
|
135
|
|
136 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
|
|
137
|
51
|
138 start.time = proc.time()
|
|
139 merge.list = c()
|
|
140
|
|
141 if(patient %in% names(patient.merge.list)){
|
|
142 patientMerge = patient.merge.list[[patient]]
|
|
143 merge.list[["second"]] = patient.merge.list.second[[patient]]
|
|
144 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
|
|
145
|
|
146 print(names(patient.merge.list))
|
|
147 } else {
|
|
148 #fuzzy matching here...
|
49
|
149 #merge.list = patientMerge$merge
|
51
|
150
|
49
|
151 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
|
|
152 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
|
|
153
|
|
154 patient1.fuzzy = patient1
|
|
155 patient2.fuzzy = patient2
|
|
156
|
36
|
157 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
|
|
158 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
|
51
|
159
|
48
|
160 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
|
|
161 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
|
51
|
162
|
48
|
163 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
|
|
164 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
|
51
|
165
|
49
|
166 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
|
|
167 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
|
51
|
168
|
49
|
169 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
|
|
170 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
|
51
|
171
|
37
|
172 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
|
|
173 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
|
49
|
174
|
|
175 merge.list = list()
|
|
176
|
|
177 merge.list[["second"]] = vector()
|
|
178
|
37
|
179 while(nrow(patient.fuzzy) > 1){
|
|
180 first.merge = patient.fuzzy[1,"merge"]
|
|
181 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
|
49
|
182 first.sample = patient.fuzzy[1,"Sample"]
|
37
|
183 merge.filter = first.merge == patient.fuzzy$merge
|
51
|
184
|
49
|
185 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
|
51
|
186
|
49
|
187 first.sample.filter = first.sample == patient.fuzzy$Sample
|
|
188 second.sample.filter = first.sample != patient.fuzzy$Sample
|
|
189
|
|
190 #first match same sample, sum to a single row, same for other sample
|
|
191 #then merge rows like 'normal'
|
51
|
192
|
48
|
193 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
|
49
|
194
|
|
195
|
|
196
|
44
|
197 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
|
49
|
198 first.match.filter = merge.filter & sequence.filter & first.sample.filter
|
|
199 second.match.filter = merge.filter & sequence.filter & second.sample.filter
|
|
200
|
|
201 first.rows = patient.fuzzy[first.match.filter,]
|
|
202 second.rows = patient.fuzzy[second.match.filter,]
|
|
203
|
50
|
204 first.rows.v = table(first.rows$V_Segment_Major_Gene)
|
|
205 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
|
|
206 first.rows.j = table(first.rows$J_Segment_Major_Gene)
|
|
207 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
|
|
208
|
49
|
209 first.sum = data.frame(merge = first.clone.sequence,
|
|
210 Patient = patient,
|
|
211 Receptor = first.rows[1,"Receptor"],
|
|
212 Sample = first.rows[1,"Sample"],
|
|
213 Cell_Count = first.rows[1,"Cell_Count"],
|
|
214 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
|
|
215 Log10_Frequency = log10(sum(first.rows$Frequency)),
|
|
216 Total_Read_Count = sum(first.rows$Total_Read_Count),
|
|
217 dsPerM = sum(first.rows$dsPerM),
|
50
|
218 J_Segment_Major_Gene = first.rows.j,
|
|
219 V_Segment_Major_Gene = first.rows.v,
|
49
|
220 Clone_Sequence = first.clone.sequence,
|
|
221 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
|
|
222 Related_to_leukemia_clone = F,
|
|
223 Frequency = sum(first.rows$Frequency),
|
|
224 locus_V = first.rows[1,"locus_V"],
|
|
225 locus_J = first.rows[1,"locus_J"],
|
|
226 min_cell_count = first.rows[1,"min_cell_count"],
|
|
227 normalized_read_count = sum(first.rows$normalized_read_count),
|
|
228 paste = first.rows[1,"paste"],
|
|
229 min_cell_paste = first.rows[1,"min_cell_paste"])
|
|
230
|
|
231 if(nrow(second.rows) > 0){
|
50
|
232 second.rows.v = table(second.rows$V_Segment_Major_Gene)
|
|
233 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
|
|
234 second.rows.j = table(second.rows$J_Segment_Major_Gene)
|
|
235 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
|
|
236
|
55
|
237 second.sum = data.frame(merge = first.clone.sequence,
|
49
|
238 Patient = patient,
|
|
239 Receptor = second.rows[1,"Receptor"],
|
|
240 Sample = second.rows[1,"Sample"],
|
|
241 Cell_Count = second.rows[1,"Cell_Count"],
|
|
242 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
|
|
243 Log10_Frequency = log10(sum(second.rows$Frequency)),
|
|
244 Total_Read_Count = sum(second.rows$Total_Read_Count),
|
|
245 dsPerM = sum(second.rows$dsPerM),
|
50
|
246 J_Segment_Major_Gene = second.rows.j,
|
|
247 V_Segment_Major_Gene = second.rows.v,
|
49
|
248 Clone_Sequence = first.clone.sequence,
|
|
249 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
|
|
250 Related_to_leukemia_clone = F,
|
|
251 Frequency = sum(second.rows$Frequency),
|
|
252 locus_V = second.rows[1,"locus_V"],
|
|
253 locus_J = second.rows[1,"locus_J"],
|
|
254 min_cell_count = second.rows[1,"min_cell_count"],
|
|
255 normalized_read_count = sum(second.rows$normalized_read_count),
|
|
256 paste = second.rows[1,"paste"],
|
|
257 min_cell_paste = second.rows[1,"min_cell_paste"])
|
|
258
|
|
259 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
|
|
260 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
|
|
261
|
50
|
262 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
|
|
263 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
49
|
264
|
56
|
265 tmp.rows = rbind(first.rows, second.rows)
|
|
266 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
|
|
267
|
|
268 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
|
|
269 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
|
|
270 } else {
|
|
271 second.clone.sequence = second.rows[1,"Clone_Sequence"]
|
|
272 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
|
|
273 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
|
|
274 } else {
|
58
|
275 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
|
49
|
276 }
|
|
277 }
|
|
278
|
59
|
279 } else if(nrow(first.rows) > 1) {
|
|
280 if(patient1[1,"Sample"] == first.sample){
|
|
281 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
|
|
282 patient1 = rbind(patient1, first.sum)
|
|
283 } else {
|
|
284 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
|
|
285 patient2 = rbind(patient2, first.sum)
|
|
286 }
|
|
287
|
|
288 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
|
|
289 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
|
290
|
|
291 patient.fuzzy = patient.fuzzy[-first.match.filter,]
|
|
292
|
|
293 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
|
37
|
294 } else {
|
|
295 patient.fuzzy = patient.fuzzy[-1,]
|
34
|
296 }
|
|
297 }
|
51
|
298 patient.merge.list[[patient]] <<- patientMerge
|
|
299 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
|
|
300 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
|
34
|
301 }
|
51
|
302
|
52
|
303 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
|
|
304 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
|
51
|
305
|
37
|
306
|
18
|
307 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
|
0
|
308 res1 = vector()
|
|
309 res2 = vector()
|
|
310 resBoth = vector()
|
|
311 read1Count = vector()
|
|
312 read2Count = vector()
|
2
|
313 locussum1 = vector()
|
|
314 locussum2 = vector()
|
9
|
315
|
0
|
316 #for(iter in 1){
|
|
317 for(iter in 1:length(product[,1])){
|
|
318 threshhold = product[iter,threshholdIndex]
|
|
319 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
320 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
321 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
|
29
|
322 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
|
30
|
323 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
|
|
324 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
|
14
|
325 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
|
|
326 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
|
0
|
327 res1 = append(res1, sum(one))
|
2
|
328 res2 = append(res2, sum(two))
|
0
|
329 resBoth = append(resBoth, sum(both))
|
2
|
330 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
|
|
331 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
|
0
|
332 #threshhold = 0
|
|
333 if(threshhold != 0){
|
|
334 if(sum(one) > 0){
|
15
|
335 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
336 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
337 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
338 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
339 }
|
|
340 if(sum(two) > 0){
|
15
|
341 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
342 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
343 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
344 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
345 }
|
29
|
346 } else {
|
|
347 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
49
|
348 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),]
|
|
349 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
|
29
|
350 if(nrow(scatterplot_locus_data) > 0){
|
|
351 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
|
|
352 }
|
34
|
353 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
|
|
354 in_two = (scatterplot_locus_data$merge %in% patient2$merge)
|
|
355 if(any(in_two)){
|
49
|
356 scatterplot_locus_data[in_two,]$type = twoSample
|
34
|
357 }
|
49
|
358 in_both = (scatterplot_locus_data$merge %in% patientMerge$merge)
|
|
359 #merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]])
|
|
360 #exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches)
|
30
|
361 if(any(in_both)){
|
|
362 scatterplot_locus_data[in_both,]$type = "In Both"
|
|
363 }
|
29
|
364 if(type == "single"){
|
|
365 single_patients <<- rbind(single_patients, scatterplot_locus_data)
|
|
366 }
|
|
367 p = NULL
|
|
368 if(nrow(scatterplot_locus_data) != 0){
|
|
369 if(on == "normalized_read_count"){
|
30
|
370 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
49
|
371 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
|
29
|
372 } else {
|
49
|
373 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
|
29
|
374 }
|
|
375 p = p + geom_point(aes(colour=type), position="jitter")
|
|
376 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
377 } else {
|
|
378 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
379 }
|
|
380 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
381 print(p)
|
|
382 dev.off()
|
0
|
383 }
|
|
384 if(sum(both) > 0){
|
15
|
385 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
386 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
0
|
387 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
388 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
29
|
389 }
|
0
|
390 }
|
2
|
391 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
|
0
|
392 if(sum(is.na(patientResult$percentage)) > 0){
|
|
393 patientResult[is.na(patientResult$percentage),]$percentage = 0
|
|
394 }
|
|
395 colnames(patientResult)[6] = oneSample
|
|
396 colnames(patientResult)[8] = twoSample
|
|
397 colnamesBak = colnames(patientResult)
|
2
|
398 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
|
0
|
399 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
400 colnames(patientResult) = colnamesBak
|
|
401
|
|
402 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
403 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
404
|
|
405 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
|
|
406 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
|
|
407 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
408 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
|
|
409 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
|
|
410 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
411 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
412 print(plt)
|
|
413 dev.off()
|
|
414 #(t,r,b,l)
|
|
415 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
|
|
416 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
|
|
417 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
418 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
|
|
419 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
|
|
420 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
421 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
422 print(plt)
|
|
423 dev.off()
|
|
424
|
|
425 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
|
|
426 patientResult$relativeValue = patientResult$value * 10
|
|
427 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
428 plt = ggplot(patientResult)
|
|
429 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
430 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
431 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
432 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
|
|
433 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
|
|
434 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
|
|
435 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
|
|
436 print(plt)
|
|
437 dev.off()
|
|
438 }
|
|
439
|
3
|
440 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
441
|
0
|
442 interval = intervalFreq
|
|
443 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
444 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
60
|
445 lapply(patients[4], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
|
0
|
446
|
3
|
447 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
448
|
0
|
449 interval = intervalReads
|
|
450 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
60
|
452 lapply(patients[4], FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
|
33
|
453
|
|
454 if(nrow(single_patients) > 0){
|
|
455 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
456 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
|
457 p = p + geom_point(aes(colour=type), position="jitter")
|
|
458 p = p + xlab("In one or both samples") + ylab("Reads")
|
|
459 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
|
|
460 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
|
|
461 print(p)
|
|
462 dev.off()
|
7
|
463
|
33
|
464 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
|
465 p = p + geom_point(aes(colour=type), position="jitter")
|
|
466 p = p + xlab("In one or both samples") + ylab("Frequency")
|
|
467 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
468 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
|
|
469 print(p)
|
|
470 dev.off()
|
|
471 } else {
|
|
472 empty <- data.frame()
|
|
473 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
474
|
|
475 png("singles_reads_scatterplot.png", width=400, height=300)
|
|
476 print(p)
|
|
477 dev.off()
|
|
478
|
|
479 png("singles_freq_scatterplot.png", width=400, height=300)
|
|
480 print(p)
|
|
481 dev.off()
|
|
482 }
|
60
|
483
|
|
484 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
|
|
485 patient.merge.list.second = list()
|
|
486
|
7
|
487 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
|
|
488 onShort = "reads"
|
|
489 if(on == "Frequency"){
|
|
490 onShort = "freq"
|
|
491 }
|
18
|
492 onx = paste(on, ".x", sep="")
|
|
493 ony = paste(on, ".y", sep="")
|
|
494 onz = paste(on, ".z", sep="")
|
7
|
495 type="triplet"
|
|
496
|
|
497 threshholdIndex = which(colnames(product) == "interval")
|
|
498 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
499 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
500 titleIndex = which(colnames(product) == "Titles")
|
|
501 sampleIndex = which(colnames(patient1) == "Sample")
|
|
502 patientIndex = which(colnames(patient1) == "Patient")
|
|
503 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
504 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
505 threeSample = paste(patient3[1,sampleIndex], sep="")
|
60
|
506
|
29
|
507 if(mergeOn == "Clone_Sequence"){
|
|
508 patient1$merge = paste(patient1$Clone_Sequence)
|
|
509 patient2$merge = paste(patient2$Clone_Sequence)
|
|
510 patient3$merge = paste(patient3$Clone_Sequence)
|
|
511
|
|
512 } else {
|
|
513 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
514 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
515 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
|
|
516 }
|
60
|
517
|
|
518 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
|
|
519 patient1.fuzzy = patient1
|
|
520 patient2.fuzzy = patient2
|
|
521 patient3.fuzzy = patient3
|
|
522
|
|
523 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
|
|
524
|
|
525 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
|
|
526 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
|
|
527 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
|
|
528
|
|
529 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
|
|
530
|
|
531 other.sample.list = list()
|
|
532 other.sample.list[[oneSample]] = c(twoSample, threeSample)
|
|
533 other.sample.list[[twoSample]] = c(oneSample, threeSample)
|
|
534 other.sample.list[[threeSample]] = c(oneSample, twoSample)
|
|
535
|
9
|
536 patientMerge = merge(patient1, patient2, by="merge")
|
|
537 patientMerge = merge(patientMerge, patient3, by="merge")
|
28
|
538 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
|
60
|
539 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
|
540 patientMerge = patientMerge[NULL,]
|
|
541
|
|
542 duo.merge.list = list()
|
|
543
|
20
|
544 patientMerge12 = merge(patient1, patient2, by="merge")
|
60
|
545 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
546 patientMerge12 = patientMerge12[NULL,]
|
|
547 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
|
|
548 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
|
|
549
|
20
|
550 patientMerge13 = merge(patient1, patient3, by="merge")
|
60
|
551 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
552 patientMerge13 = patientMerge13[NULL,]
|
|
553 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
|
|
554 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
|
|
555
|
20
|
556 patientMerge23 = merge(patient2, patient3, by="merge")
|
60
|
557 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
558 patientMerge23 = patientMerge23[NULL,]
|
|
559 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
|
|
560 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
|
|
561
|
|
562 merge.list = list()
|
|
563 merge.list[["second"]] = vector()
|
|
564
|
|
565 start.time = proc.time()
|
|
566 if(paste(label1, "123") %in% names(patient.merge.list)){
|
|
567 patientMerge = patient.merge.list[[paste(label1, "123")]]
|
|
568 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
|
|
569 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
|
|
570 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
|
|
571
|
|
572 merge.list[["second"]] = patient.merge.list.second[[label1]]
|
|
573
|
|
574 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
|
|
575 } else {
|
|
576 while(nrow(patient.fuzzy) > 0){
|
|
577 first.merge = patient.fuzzy[1,"merge"]
|
|
578 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
|
|
579 first.sample = patient.fuzzy[1,"Sample"]
|
|
580
|
|
581 merge.filter = first.merge == patient.fuzzy$merge
|
|
582
|
|
583 second.sample = other.sample.list[[first.sample]][1]
|
|
584 third.sample = other.sample.list[[first.sample]][2]
|
|
585
|
|
586 sample.filter.1 = first.sample == patient.fuzzy$Sample
|
|
587 sample.filter.2 = second.sample == patient.fuzzy$Sample
|
|
588 sample.filter.3 = third.sample == patient.fuzzy$Sample
|
|
589
|
|
590 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
|
|
591
|
|
592 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
|
|
593 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
|
|
594 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
|
|
595
|
|
596 matches.in.1 = any(match.filter.1)
|
|
597 matches.in.2 = any(match.filter.2)
|
|
598 matches.in.3 = any(match.filter.3)
|
|
599
|
|
600
|
|
601
|
|
602 rows.1 = patient.fuzzy[match.filter.1,]
|
|
603
|
|
604 sum.1 = data.frame(merge = first.clone.sequence,
|
|
605 Patient = label1,
|
|
606 Receptor = rows.1[1,"Receptor"],
|
|
607 Sample = rows.1[1,"Sample"],
|
|
608 Cell_Count = rows.1[1,"Cell_Count"],
|
|
609 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
|
|
610 Log10_Frequency = log10(sum(rows.1$Frequency)),
|
|
611 Total_Read_Count = sum(rows.1$Total_Read_Count),
|
|
612 dsPerM = sum(rows.1$dsPerM),
|
|
613 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
|
|
614 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
|
|
615 Clone_Sequence = first.clone.sequence,
|
|
616 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
|
|
617 Related_to_leukemia_clone = F,
|
|
618 Frequency = sum(rows.1$Frequency),
|
|
619 locus_V = rows.1[1,"locus_V"],
|
|
620 locus_J = rows.1[1,"locus_J"],
|
|
621 normalized_read_count = sum(rows.1$normalized_read_count))
|
|
622 sum.2 = sum.1[NULL,]
|
|
623 rows.2 = patient.fuzzy[match.filter.2,]
|
|
624 if(matches.in.2){
|
|
625 sum.2 = data.frame(merge = first.clone.sequence,
|
|
626 Patient = label1,
|
|
627 Receptor = rows.2[1,"Receptor"],
|
|
628 Sample = rows.2[1,"Sample"],
|
|
629 Cell_Count = rows.2[1,"Cell_Count"],
|
|
630 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
|
|
631 Log10_Frequency = log10(sum(rows.2$Frequency)),
|
|
632 Total_Read_Count = sum(rows.2$Total_Read_Count),
|
|
633 dsPerM = sum(rows.2$dsPerM),
|
|
634 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
|
|
635 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
|
|
636 Clone_Sequence = first.clone.sequence,
|
|
637 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
|
|
638 Related_to_leukemia_clone = F,
|
|
639 Frequency = sum(rows.2$Frequency),
|
|
640 locus_V = rows.2[1,"locus_V"],
|
|
641 locus_J = rows.2[1,"locus_J"],
|
|
642 normalized_read_count = sum(rows.2$normalized_read_count))
|
|
643 }
|
|
644
|
|
645 sum.3 = sum.1[NULL,]
|
|
646 rows.3 = patient.fuzzy[match.filter.3,]
|
|
647 if(matches.in.3){
|
|
648 sum.3 = data.frame(merge = first.clone.sequence,
|
|
649 Patient = label1,
|
|
650 Receptor = rows.3[1,"Receptor"],
|
|
651 Sample = rows.3[1,"Sample"],
|
|
652 Cell_Count = rows.3[1,"Cell_Count"],
|
|
653 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
|
|
654 Log10_Frequency = log10(sum(rows.3$Frequency)),
|
|
655 Total_Read_Count = sum(rows.3$Total_Read_Count),
|
|
656 dsPerM = sum(rows.3$dsPerM),
|
|
657 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
|
|
658 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
|
|
659 Clone_Sequence = first.clone.sequence,
|
|
660 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
|
|
661 Related_to_leukemia_clone = F,
|
|
662 Frequency = sum(rows.3$Frequency),
|
|
663 locus_V = rows.3[1,"locus_V"],
|
|
664 locus_J = rows.3[1,"locus_J"],
|
|
665 normalized_read_count = sum(rows.3$normalized_read_count))
|
|
666 }
|
|
667
|
|
668 if(matches.in.2 & matches.in.3){
|
|
669 merge.123 = merge(sum.1, sum.2, by="merge")
|
|
670 merge.123 = merge(merge.123, sum.3, by="merge")
|
|
671 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
|
|
672 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
|
|
673
|
|
674 patientMerge = rbind(patientMerge, merge.123)
|
|
675 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
|
|
676
|
|
677 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
|
|
678 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
|
679
|
|
680 } else if (matches.in.2) {
|
|
681 #other.sample1 = other.sample.list[[first.sample]][1]
|
|
682 #other.sample2 = other.sample.list[[first.sample]][2]
|
|
683
|
|
684 second.sample = sum.2[,"Sample"]
|
|
685
|
|
686 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
|
|
687
|
|
688 merge.12 = merge(sum.1, sum.2, by="merge")
|
|
689
|
|
690 current.merge.list = rbind(current.merge.list, merge.12)
|
|
691 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
|
|
692
|
|
693 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
|
|
694
|
|
695 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
|
|
696 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
|
697
|
|
698 } else if (matches.in.3) {
|
|
699
|
|
700 #other.sample1 = other.sample.list[[first.sample]][1]
|
|
701 #other.sample2 = other.sample.list[[first.sample]][2]
|
|
702
|
|
703 second.sample = sum.3[,"Sample"]
|
|
704
|
|
705 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
|
|
706
|
|
707 merge.13 = merge(sum.1, sum.3, by="merge")
|
|
708
|
|
709 current.merge.list = rbind(current.merge.list, merge.13)
|
|
710 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
|
|
711
|
|
712 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
|
|
713
|
|
714 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
|
|
715 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
|
|
716
|
|
717 } else {
|
|
718 patient.fuzzy = patient.fuzzy[-1,]
|
|
719 }
|
|
720
|
|
721 tmp.rows = rbind(rows.1, rows.2, rows.3)
|
|
722 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
|
|
723
|
|
724 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
|
|
725 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
|
|
726 } else {
|
|
727 }
|
|
728
|
|
729 }
|
|
730 patient.merge.list[[paste(label1, "123")]] = patientMerge
|
|
731
|
|
732 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
|
|
733 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
|
|
734 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
|
|
735
|
|
736 patient.merge.list[[paste(label1, "12")]] = patientMerge12
|
|
737 patient.merge.list[[paste(label1, "13")]] = patientMerge13
|
|
738 patient.merge.list[[paste(label1, "23")]] = patientMerge23
|
|
739
|
|
740 patient.merge.list.second[[label1]] = merge.list[["second"]]
|
|
741 }
|
|
742 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
|
|
743 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
|
744 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
745 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
20
|
746 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
60
|
747
|
|
748 if(F){
|
|
749 patientMerge = merge(patient1, patient2, by="merge")
|
|
750 patientMerge = merge(patientMerge, patient3, by="merge")
|
|
751 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
|
|
752 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
|
753 patientMerge12 = merge(patient1, patient2, by="merge")
|
|
754 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
755 patientMerge13 = merge(patient1, patient3, by="merge")
|
|
756 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
757 patientMerge23 = merge(patient2, patient3, by="merge")
|
|
758 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
759 }
|
28
|
760
|
30
|
761 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
20
|
762 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
|
30
|
763 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
27
|
764 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
|
20
|
765
|
7
|
766 res1 = vector()
|
|
767 res2 = vector()
|
|
768 res3 = vector()
|
20
|
769 res12 = vector()
|
|
770 res13 = vector()
|
|
771 res23 = vector()
|
7
|
772 resAll = vector()
|
|
773 read1Count = vector()
|
|
774 read2Count = vector()
|
|
775 read3Count = vector()
|
|
776
|
|
777 if(appendTriplets){
|
9
|
778 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
|
7
|
779 }
|
|
780 for(iter in 1:length(product[,1])){
|
|
781 threshhold = product[iter,threshholdIndex]
|
|
782 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
783 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
784 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
|
|
785 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
|
7
|
786
|
30
|
787 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
|
|
788 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
|
|
789 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
|
24
|
790
|
30
|
791 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
|
|
792 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
|
|
793 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
|
20
|
794
|
18
|
795 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
|
|
796 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
|
|
797 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
|
7
|
798 res1 = append(res1, sum(one))
|
|
799 res2 = append(res2, sum(two))
|
|
800 res3 = append(res3, sum(three))
|
|
801 resAll = append(resAll, sum(all))
|
20
|
802 res12 = append(res12, sum(one_two))
|
|
803 res13 = append(res13, sum(one_three))
|
|
804 res23 = append(res23, sum(two_three))
|
7
|
805 #threshhold = 0
|
|
806 if(threshhold != 0){
|
|
807 if(sum(one) > 0){
|
20
|
808 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
809 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
810 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
811 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
812 }
|
|
813 if(sum(two) > 0){
|
20
|
814 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
815 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
816 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
817 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
818 }
|
|
819 if(sum(three) > 0){
|
20
|
820 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
821 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
822 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
823 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
824 }
|
20
|
825 if(sum(one_two) > 0){
|
|
826 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
827 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
|
828 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
829 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
830 }
|
|
831 if(sum(one_three) > 0){
|
|
832 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
833 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
834 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
835 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
836 }
|
|
837 if(sum(two_three) > 0){
|
|
838 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
839 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
840 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
841 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
842 }
|
|
843 } else { #scatterplot data
|
24
|
844 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
60
|
845 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
|
30
|
846 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
|
27
|
847 if(sum(in_two) > 0){
|
|
848 scatterplot_locus_data[in_two,]$type = "In two"
|
|
849 }
|
30
|
850 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
|
27
|
851 if(sum(in_three)> 0){
|
|
852 scatterplot_locus_data[in_three,]$type = "In three"
|
|
853 }
|
|
854 not_in_one = scatterplot_locus_data$type != "In one"
|
|
855 if(sum(not_in_one) > 0){
|
|
856 scatterplot_locus_data[not_in_one,]$type = "In multiple"
|
|
857 }
|
20
|
858 p = NULL
|
|
859 if(nrow(scatterplot_locus_data) != 0){
|
|
860 if(on == "normalized_read_count"){
|
31
|
861 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
32
|
862 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
20
|
863 } else {
|
32
|
864 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
20
|
865 }
|
|
866 p = p + geom_point(aes(colour=type), position="jitter")
|
25
|
867 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
20
|
868 } else {
|
|
869 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
|
870 }
|
|
871 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
872 print(p)
|
|
873 dev.off()
|
|
874 }
|
7
|
875 if(sum(all) > 0){
|
20
|
876 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
|
|
877 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
7
|
878 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
879 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
880 }
|
|
881 }
|
20
|
882 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
|
|
883 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
|
7
|
884 colnames(patientResult)[6] = oneSample
|
20
|
885 colnames(patientResult)[7] = twoSample
|
|
886 colnames(patientResult)[8] = threeSample
|
|
887 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
|
|
888 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
|
|
889 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
|
7
|
890
|
|
891 colnamesBak = colnames(patientResult)
|
20
|
892 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
|
7
|
893 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
894 colnames(patientResult) = colnamesBak
|
|
895
|
|
896 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
897 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
898
|
|
899 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
|
|
900 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
|
|
901 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
902 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
|
|
903 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
|
|
904 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
905 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
|
|
906 print(plt)
|
|
907 dev.off()
|
|
908
|
|
909 fontSize = 4
|
|
910
|
|
911 bak = patientResult
|
|
912 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
|
|
913 patientResult$relativeValue = patientResult$value * 10
|
|
914 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
915 plt = ggplot(patientResult)
|
|
916 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
917 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
918 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
919 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
|
|
920 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
|
|
921 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
|
|
922 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
|
|
923 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
|
|
924 print(plt)
|
|
925 dev.off()
|
|
926 }
|
|
927
|
33
|
928 if(nrow(triplets) != 0){
|
60
|
929
|
|
930 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
|
|
931
|
33
|
932 triplets$uniqueID = "ID"
|
|
933
|
|
934 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
935 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
936 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
937
|
|
938 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
939 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
940 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
941
|
|
942 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
|
60
|
943
|
|
944 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
945
|
33
|
946 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
|
|
947 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
|
|
948 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
|
|
949
|
|
950 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
|
|
951 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
952
|
|
953 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
|
954
|
|
955 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
|
|
956
|
|
957 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
958
|
|
959 triplets = triplets[triplets$normalized_read_count >= min_cells,]
|
|
960
|
60
|
961 column_drops = c("min_cell_count", "min_cell_paste")
|
33
|
962
|
|
963 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
|
60
|
964
|
|
965 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
966
|
33
|
967 interval = intervalReads
|
|
968 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
969 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
970
|
|
971 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
972 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
973 three = triplets[triplets$Sample == "24062_reg_BM",]
|
60
|
974 #tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
|
33
|
975
|
|
976 one = triplets[triplets$Sample == "16278_Left",]
|
|
977 two = triplets[triplets$Sample == "26402_Left",]
|
|
978 three = triplets[triplets$Sample == "26759_Left",]
|
60
|
979 #tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
|
33
|
980
|
|
981 one = triplets[triplets$Sample == "16278_Right",]
|
|
982 two = triplets[triplets$Sample == "26402_Right",]
|
|
983 three = triplets[triplets$Sample == "26759_Right",]
|
53
|
984 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
|
33
|
985
|
60
|
986 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
987
|
33
|
988 interval = intervalFreq
|
|
989 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
990 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
991
|
|
992 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
993 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
994 three = triplets[triplets$Sample == "24062_reg_BM",]
|
60
|
995 #tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
|
33
|
996
|
|
997 one = triplets[triplets$Sample == "16278_Left",]
|
|
998 two = triplets[triplets$Sample == "26402_Left",]
|
|
999 three = triplets[triplets$Sample == "26759_Left",]
|
60
|
1000 #tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
|
33
|
1001
|
|
1002 one = triplets[triplets$Sample == "16278_Right",]
|
|
1003 two = triplets[triplets$Sample == "26402_Right",]
|
|
1004 three = triplets[triplets$Sample == "26759_Right",]
|
53
|
1005 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
|
33
|
1006 } else {
|
|
1007 cat("", file="triplets.txt")
|
|
1008 }
|
60
|
1009 cat("</table></html>", file=logfile, append=T) |