Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 68:ef13f0a3f4d6 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 05 Jan 2016 10:49:40 -0500 |
parents | 40c72b9ffc79 |
children | c532b3f8dc97 |
comparison
equal
deleted
inserted
replaced
67:40c72b9ffc79 | 68:ef13f0a3f4d6 |
---|---|
1 args <- commandArgs(trailingOnly = TRUE) | 1 args <- commandArgs(trailingOnly = TRUE) |
2 options(scipen=999) | |
2 | 3 |
3 inFile = args[1] | 4 inFile = args[1] |
4 outDir = args[2] | 5 outDir = args[2] |
5 logfile = args[3] | 6 logfile = args[3] |
6 min_freq = as.numeric(args[4]) | 7 min_freq = as.numeric(args[4]) |
65 | 66 |
66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0)) | 67 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0)) |
67 | 68 |
68 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... | 69 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... |
69 patient.merge.list.second = list() | 70 patient.merge.list.second = list() |
71 scatter_locus_data_list = list() | |
70 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T) | 72 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T) |
71 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T) | 73 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T) |
72 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ | 74 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ |
73 if (!is.data.frame(x) & is.list(x)){ | 75 if (!is.data.frame(x) & is.list(x)){ |
74 x = x[[1]] | 76 x = x[[1]] |
123 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 125 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) |
124 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 126 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) |
125 } | 127 } |
126 | 128 |
127 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") | 129 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") |
128 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns]) | 130 #scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns]) |
129 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] | 131 scatterplot_data = patient1[NULL,scatterplot_data_columns] |
130 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both")) | 132 #scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] |
131 scatterplot_data$on = onShort | 133 #scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both")) |
134 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")) | |
135 #scatterplot_data$type = factor(x=NULL, levels=scatterplot.data.type.factor) | |
136 scatterplot_data$type = character(0) | |
137 scatterplot_data$link = numeric(0) | |
138 scatterplot_data$on = character(0) | |
132 | 139 |
133 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy' | 140 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy' |
134 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh | 141 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh |
135 | 142 |
136 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence | 143 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence |
139 merge.list = c() | 146 merge.list = c() |
140 | 147 |
141 if(patient %in% names(patient.merge.list)){ | 148 if(patient %in% names(patient.merge.list)){ |
142 patientMerge = patient.merge.list[[patient]] | 149 patientMerge = patient.merge.list[[patient]] |
143 merge.list[["second"]] = patient.merge.list.second[[patient]] | 150 merge.list[["second"]] = patient.merge.list.second[[patient]] |
151 scatterplot_data = scatter_locus_data_list[[patient]] | |
144 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) | 152 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) |
145 | 153 |
146 print(names(patient.merge.list)) | 154 print(names(patient.merge.list)) |
147 } else { | 155 } else { |
148 #fuzzy matching here... | 156 #fuzzy matching here... |
173 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] | 181 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] |
174 | 182 |
175 merge.list = list() | 183 merge.list = list() |
176 | 184 |
177 merge.list[["second"]] = vector() | 185 merge.list[["second"]] = vector() |
178 | 186 |
187 link.count = 1 | |
188 | |
179 while(nrow(patient.fuzzy) > 1){ | 189 while(nrow(patient.fuzzy) > 1){ |
180 first.merge = patient.fuzzy[1,"merge"] | 190 first.merge = patient.fuzzy[1,"merge"] |
181 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] | 191 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] |
182 first.sample = patient.fuzzy[1,"Sample"] | 192 first.sample = patient.fuzzy[1,"Sample"] |
183 merge.filter = first.merge == patient.fuzzy$merge | 193 merge.filter = first.merge == patient.fuzzy$merge |
262 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) | 272 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) |
263 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | 273 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) |
264 | 274 |
265 tmp.rows = rbind(first.rows, second.rows) | 275 tmp.rows = rbind(first.rows, second.rows) |
266 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),] | 276 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),] |
267 | 277 |
278 | |
279 #add to the scatterplot data | |
280 scatterplot.row = first.sum[,scatterplot_data_columns] | |
281 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both") | |
282 scatterplot.row$link = link.count | |
283 scatterplot.row$on = onShort | |
284 | |
285 scatterplot_data = rbind(scatterplot_data, scatterplot.row) | |
286 | |
287 scatterplot.row = second.sum[,scatterplot_data_columns] | |
288 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both") | |
289 scatterplot.row$link = link.count | |
290 scatterplot.row$on = onShort | |
291 | |
292 scatterplot_data = rbind(scatterplot_data, scatterplot.row) | |
293 | |
294 #write some information about the match to a log file | |
268 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) { | 295 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) { |
269 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T) | 296 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T) |
270 } else { | 297 } else { |
271 second.clone.sequence = second.rows[1,"Clone_Sequence"] | 298 second.clone.sequence = second.rows[1,"Clone_Sequence"] |
272 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){ | 299 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){ |
287 | 314 |
288 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"]) | 315 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"]) |
289 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | 316 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) |
290 | 317 |
291 patient.fuzzy = patient.fuzzy[-first.match.filter,] | 318 patient.fuzzy = patient.fuzzy[-first.match.filter,] |
319 | |
320 #add to the scatterplot data | |
321 scatterplot.row = first.sum[,scatterplot_data_columns] | |
322 scatterplot.row$type = first.sum[,"Sample"] | |
323 scatterplot.row$link = link.count | |
324 scatterplot.row$on = onShort | |
325 | |
326 scatterplot_data = rbind(scatterplot_data, scatterplot.row) | |
292 | 327 |
293 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T) | 328 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T) |
294 } else { | 329 } else { |
295 patient.fuzzy = patient.fuzzy[-1,] | 330 patient.fuzzy = patient.fuzzy[-1,] |
296 } | 331 |
332 #add to the scatterplot data | |
333 scatterplot.row = first.sum[,scatterplot_data_columns] | |
334 scatterplot.row$type = first.sum[,"Sample"] | |
335 scatterplot.row$link = link.count | |
336 scatterplot.row$on = onShort | |
337 | |
338 scatterplot_data = rbind(scatterplot_data, scatterplot.row) | |
339 } | |
340 link.count = link.count + 1 | |
297 } | 341 } |
298 patient.merge.list[[patient]] <<- patientMerge | 342 patient.merge.list[[patient]] <<- patientMerge |
299 patient.merge.list.second[[patient]] <<- merge.list[["second"]] | 343 patient.merge.list.second[[patient]] <<- merge.list[["second"]] |
344 scatter_locus_data_list[[patient]] <<- scatterplot_data | |
300 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) | 345 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) |
301 } | 346 } |
302 | 347 |
303 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),] | 348 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),] |
304 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),] | 349 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),] |
305 | 350 |
306 | 351 |
307 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | 352 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) |
353 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony]) | |
308 res1 = vector() | 354 res1 = vector() |
309 res2 = vector() | 355 res2 = vector() |
310 resBoth = vector() | 356 resBoth = vector() |
311 read1Count = vector() | 357 read1Count = vector() |
312 read2Count = vector() | 358 read2Count = vector() |
344 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 390 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
345 } | 391 } |
346 } else { | 392 } else { |
347 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] | 393 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] |
348 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),] | 394 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),] |
349 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),] | 395 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),] |
350 if(nrow(scatterplot_locus_data) > 0){ | 396 if(nrow(scatterplot_locus_data) > 0){ |
351 scatterplot_locus_data$Rearrangement = product[iter, titleIndex] | 397 scatterplot_locus_data$Rearrangement = product[iter, titleIndex] |
352 } | 398 } |
353 in_one = (scatterplot_locus_data$merge %in% patient1$merge) | 399 |
354 in_two = (scatterplot_locus_data$merge %in% patient2$merge) | 400 |
355 if(any(in_two)){ | 401 |
356 scatterplot_locus_data[in_two,]$type = twoSample | 402 #in_one = (scatterplot_locus_data$merge %in% patient1$merge) |
357 } | 403 #in_two = (scatterplot_locus_data$merge %in% patient2$merge) |
358 in_both = (scatterplot_locus_data$merge %in% patientMerge$merge) | 404 #if(any(in_two)){ |
359 #merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]]) | 405 # scatterplot_locus_data[in_two,]$type = twoSample |
360 #exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches) | 406 #} |
361 if(any(in_both)){ | 407 #in_both = (scatterplot_locus_data$merge %in% patientMerge$merge) |
362 scatterplot_locus_data[in_both,]$type = "In Both" | 408 ##merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]]) |
363 } | 409 ##exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches) |
364 if(type == "single"){ | 410 #if(any(in_both)){ |
365 single_patients <<- rbind(single_patients, scatterplot_locus_data) | 411 # scatterplot_locus_data[in_both,]$type = "In Both" |
366 } | 412 #} |
413 #if(type == "single" & (nrow(scatterplot_locus_data) > 0 | !any(scatterplot_locus_data$Patient %in% single_patients$Patient))){ | |
414 # single_patients <<- rbind(single_patients, scatterplot_locus_data) | |
415 #} | |
416 | |
417 | |
367 p = NULL | 418 p = NULL |
419 print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data))) | |
368 if(nrow(scatterplot_locus_data) != 0){ | 420 if(nrow(scatterplot_locus_data) != 0){ |
369 if(on == "normalized_read_count"){ | 421 if(on == "normalized_read_count"){ |
370 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 422 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
371 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) | 423 #p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) |
424 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,10^6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) | |
372 } else { | 425 } else { |
373 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) | 426 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) |
427 p = ggplot(scatterplot_locus_data, aes(type, Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE) | |
374 } | 428 } |
375 p = p + geom_point(aes(colour=type), position="jitter") | 429 #p = p + geom_point(aes(colour=type), position="jitter") |
430 p = p + geom_point(aes(colour=type), position="dodge") | |
376 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) | 431 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) |
377 } else { | 432 } else { |
378 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) | 433 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) |
379 } | 434 } |
380 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) | 435 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) |
451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 506 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
452 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") | 507 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") |
453 | 508 |
454 if(nrow(single_patients) > 0){ | 509 if(nrow(single_patients) > 0){ |
455 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 510 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
456 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) | 511 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000)) |
457 p = p + geom_point(aes(colour=type), position="jitter") | 512 p = p + geom_point(aes(colour=type), position="jitter") |
458 p = p + xlab("In one or both samples") + ylab("Reads") | 513 p = p + xlab("In one or both samples") + ylab("Reads") |
459 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") | 514 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") |
460 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) | 515 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) |
461 print(p) | 516 print(p) |
462 dev.off() | 517 dev.off() |
463 | 518 |
464 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) | 519 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) |
520 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100)) | |
465 p = p + geom_point(aes(colour=type), position="jitter") | 521 p = p + geom_point(aes(colour=type), position="jitter") |
466 p = p + xlab("In one or both samples") + ylab("Frequency") | 522 p = p + xlab("In one or both samples") + ylab("Frequency") |
467 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") | 523 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") |
468 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) | 524 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) |
469 print(p) | 525 print(p) |
760 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | 816 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) |
761 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) | 817 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) |
762 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) | 818 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) |
763 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) | 819 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) |
764 | 820 |
821 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | |
822 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony]) | |
823 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony]) | |
824 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony]) | |
825 | |
765 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),] | 826 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),] |
766 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),] | 827 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),] |
767 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),] | 828 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),] |
768 | 829 |
769 if(F){ | 830 if(F){ |
880 if(nrow(scatterplot_locus_data) != 0){ | 941 if(nrow(scatterplot_locus_data) != 0){ |
881 if(on == "normalized_read_count"){ | 942 if(on == "normalized_read_count"){ |
882 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 943 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
883 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) | 944 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) |
884 } else { | 945 } else { |
885 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) | 946 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100)) |
947 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) | |
886 } | 948 } |
887 p = p + geom_point(aes(colour=type), position="jitter") | 949 p = p + geom_point(aes(colour=type), position="jitter") |
888 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) | 950 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) |
889 } else { | 951 } else { |
890 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) | 952 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex])) |