Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 33:642b4593f0a4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 24 Jul 2015 05:33:02 -0400 |
parents | dde5ec847549 |
children | 37d9074ef2c6 |
comparison
equal
deleted
inserted
replaced
32:dde5ec847549 | 33:642b4593f0a4 |
---|---|
286 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 286 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
287 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") | 287 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") |
288 | 288 |
289 cat("</table></html>", file=logfile, append=T) | 289 cat("</table></html>", file=logfile, append=T) |
290 | 290 |
291 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 291 |
292 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) | 292 if(nrow(single_patients) > 0){ |
293 p = p + geom_point(aes(colour=type), position="jitter") | 293 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
294 p = p + xlab("In one or both samples") + ylab("Reads") | 294 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) |
295 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") | 295 p = p + geom_point(aes(colour=type), position="jitter") |
296 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080) | 296 p = p + xlab("In one or both samples") + ylab("Reads") |
297 print(p) | 297 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") |
298 dev.off() | 298 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) |
299 | 299 print(p) |
300 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) | 300 dev.off() |
301 p = p + geom_point(aes(colour=type), position="jitter") | 301 |
302 p = p + xlab("In one or both samples") + ylab("Frequency") | 302 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) |
303 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") | 303 p = p + geom_point(aes(colour=type), position="jitter") |
304 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080) | 304 p = p + xlab("In one or both samples") + ylab("Frequency") |
305 print(p) | 305 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") |
306 dev.off() | 306 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) |
307 | 307 print(p) |
308 dev.off() | |
309 } else { | |
310 empty <- data.frame() | |
311 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample") | |
312 | |
313 png("singles_reads_scatterplot.png", width=400, height=300) | |
314 print(p) | |
315 dev.off() | |
316 | |
317 png("singles_freq_scatterplot.png", width=400, height=300) | |
318 print(p) | |
319 dev.off() | |
320 } | |
308 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ | 321 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ |
309 onShort = "reads" | 322 onShort = "reads" |
310 if(on == "Frequency"){ | 323 if(on == "Frequency"){ |
311 onShort = "freq" | 324 onShort = "freq" |
312 } | 325 } |
512 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) | 525 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) |
513 print(plt) | 526 print(plt) |
514 dev.off() | 527 dev.off() |
515 } | 528 } |
516 | 529 |
517 triplets$uniqueID = "ID" | 530 if(nrow(triplets) != 0){ |
518 | 531 triplets$uniqueID = "ID" |
519 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 532 |
520 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 533 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" |
521 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 534 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" |
522 | 535 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" |
523 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 536 |
524 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 537 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" |
525 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 538 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" |
526 | 539 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" |
527 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" | 540 |
528 | 541 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" |
529 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) | 542 |
530 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) | 543 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) |
531 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) | 544 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) |
532 | 545 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) |
533 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) | 546 |
534 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J) | 547 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) |
535 | 548 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J) |
536 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")] | 549 |
537 | 550 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")] |
538 triplets = merge(triplets, min_cell_count, by="min_cell_paste") | 551 |
539 | 552 triplets = merge(triplets, min_cell_count, by="min_cell_paste") |
540 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 | 553 |
541 | 554 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 |
542 triplets = triplets[triplets$normalized_read_count >= min_cells,] | 555 |
543 | 556 triplets = triplets[triplets$normalized_read_count >= min_cells,] |
544 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") | 557 |
545 | 558 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") |
546 triplets = triplets[,!(colnames(triplets) %in% column_drops)] | 559 |
547 | 560 triplets = triplets[,!(colnames(triplets) %in% column_drops)] |
548 #remove duplicate V+J+CDR3, add together numerical values | 561 |
549 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor), | 562 #remove duplicate V+J+CDR3, add together numerical values |
550 Cell_Count=unique(.SD$Cell_Count), | 563 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor), |
551 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), | 564 Cell_Count=unique(.SD$Cell_Count), |
552 Total_Read_Count=sum(.SD$Total_Read_Count), | 565 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), |
553 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0), | 566 Total_Read_Count=sum(.SD$Total_Read_Count), |
554 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone), | 567 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0), |
555 Frequency=sum(.SD$Frequency), | 568 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone), |
556 normalized_read_count=sum(.SD$normalized_read_count), | 569 Frequency=sum(.SD$Frequency), |
557 Log10_Frequency=sum(.SD$Log10_Frequency), | 570 normalized_read_count=sum(.SD$normalized_read_count), |
558 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | 571 Log10_Frequency=sum(.SD$Log10_Frequency), |
559 | 572 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")]) |
560 | 573 |
561 interval = intervalReads | 574 |
562 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 575 interval = intervalReads |
563 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 576 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
564 | 577 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
565 one = triplets[triplets$Sample == "14696_reg_BM",] | 578 |
566 two = triplets[triplets$Sample == "24536_reg_BM",] | 579 one = triplets[triplets$Sample == "14696_reg_BM",] |
567 three = triplets[triplets$Sample == "24062_reg_BM",] | 580 two = triplets[triplets$Sample == "24536_reg_BM",] |
568 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) | 581 three = triplets[triplets$Sample == "24062_reg_BM",] |
569 | 582 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) |
570 one = triplets[triplets$Sample == "16278_Left",] | 583 |
571 two = triplets[triplets$Sample == "26402_Left",] | 584 one = triplets[triplets$Sample == "16278_Left",] |
572 three = triplets[triplets$Sample == "26759_Left",] | 585 two = triplets[triplets$Sample == "26402_Left",] |
573 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) | 586 three = triplets[triplets$Sample == "26759_Left",] |
574 | 587 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) |
575 one = triplets[triplets$Sample == "16278_Right",] | 588 |
576 two = triplets[triplets$Sample == "26402_Right",] | 589 one = triplets[triplets$Sample == "16278_Right",] |
577 three = triplets[triplets$Sample == "26759_Right",] | 590 two = triplets[triplets$Sample == "26402_Right",] |
578 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) | 591 three = triplets[triplets$Sample == "26759_Right",] |
579 | 592 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) |
580 | 593 |
581 interval = intervalFreq | 594 |
582 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 595 interval = intervalFreq |
583 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 596 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
584 | 597 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
585 one = triplets[triplets$Sample == "14696_reg_BM",] | 598 |
586 two = triplets[triplets$Sample == "24536_reg_BM",] | 599 one = triplets[triplets$Sample == "14696_reg_BM",] |
587 three = triplets[triplets$Sample == "24062_reg_BM",] | 600 two = triplets[triplets$Sample == "24536_reg_BM",] |
588 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) | 601 three = triplets[triplets$Sample == "24062_reg_BM",] |
589 | 602 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) |
590 one = triplets[triplets$Sample == "16278_Left",] | 603 |
591 two = triplets[triplets$Sample == "26402_Left",] | 604 one = triplets[triplets$Sample == "16278_Left",] |
592 three = triplets[triplets$Sample == "26759_Left",] | 605 two = triplets[triplets$Sample == "26402_Left",] |
593 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) | 606 three = triplets[triplets$Sample == "26759_Left",] |
594 | 607 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) |
595 one = triplets[triplets$Sample == "16278_Right",] | 608 |
596 two = triplets[triplets$Sample == "26402_Right",] | 609 one = triplets[triplets$Sample == "16278_Right",] |
597 three = triplets[triplets$Sample == "26759_Right",] | 610 two = triplets[triplets$Sample == "26402_Right",] |
598 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) | 611 three = triplets[triplets$Sample == "26759_Right",] |
612 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) | |
613 } else { | |
614 cat("", file="triplets.txt") | |
615 } |