comparison RScript.r @ 60:28c3695259c1 draft

Uploaded
author davidvanzessen
date Thu, 29 Oct 2015 11:21:33 -0400
parents 36e307208f1b
children 77a090cd0e02
comparison
equal deleted inserted replaced
59:36e307208f1b 60:28c3695259c1
440 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) 440 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
441 441
442 interval = intervalFreq 442 interval = intervalFreq
443 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 443 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
444 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 444 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
445 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) 445 lapply(patients[4], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
446 446
447 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) 447 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
448 448
449 interval = intervalReads 449 interval = intervalReads
450 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 450 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
452 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") 452 lapply(patients[4], FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
453
454 cat("</table></html>", file=logfile, append=T)
455
456 453
457 if(nrow(single_patients) > 0){ 454 if(nrow(single_patients) > 0){
458 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) 455 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
459 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) 456 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
460 p = p + geom_point(aes(colour=type), position="jitter") 457 p = p + geom_point(aes(colour=type), position="jitter")
481 478
482 png("singles_freq_scatterplot.png", width=400, height=300) 479 png("singles_freq_scatterplot.png", width=400, height=300)
483 print(p) 480 print(p)
484 dev.off() 481 dev.off()
485 } 482 }
483
484 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
485 patient.merge.list.second = list()
486
486 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ 487 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
487 onShort = "reads" 488 onShort = "reads"
488 if(on == "Frequency"){ 489 if(on == "Frequency"){
489 onShort = "freq" 490 onShort = "freq"
490 } 491 }
500 sampleIndex = which(colnames(patient1) == "Sample") 501 sampleIndex = which(colnames(patient1) == "Sample")
501 patientIndex = which(colnames(patient1) == "Patient") 502 patientIndex = which(colnames(patient1) == "Patient")
502 oneSample = paste(patient1[1,sampleIndex], sep="") 503 oneSample = paste(patient1[1,sampleIndex], sep="")
503 twoSample = paste(patient2[1,sampleIndex], sep="") 504 twoSample = paste(patient2[1,sampleIndex], sep="")
504 threeSample = paste(patient3[1,sampleIndex], sep="") 505 threeSample = paste(patient3[1,sampleIndex], sep="")
505 506
506 if(mergeOn == "Clone_Sequence"){ 507 if(mergeOn == "Clone_Sequence"){
507 patient1$merge = paste(patient1$Clone_Sequence) 508 patient1$merge = paste(patient1$Clone_Sequence)
508 patient2$merge = paste(patient2$Clone_Sequence) 509 patient2$merge = paste(patient2$Clone_Sequence)
509 patient3$merge = paste(patient3$Clone_Sequence) 510 patient3$merge = paste(patient3$Clone_Sequence)
510 511
511 } else { 512 } else {
512 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) 513 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
513 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) 514 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
514 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) 515 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
515 } 516 }
516 517
518 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
519 patient1.fuzzy = patient1
520 patient2.fuzzy = patient2
521 patient3.fuzzy = patient3
522
523 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
524
525 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
526 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
527 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
528
529 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
530
531 other.sample.list = list()
532 other.sample.list[[oneSample]] = c(twoSample, threeSample)
533 other.sample.list[[twoSample]] = c(oneSample, threeSample)
534 other.sample.list[[threeSample]] = c(oneSample, twoSample)
535
517 patientMerge = merge(patient1, patient2, by="merge") 536 patientMerge = merge(patient1, patient2, by="merge")
518 patientMerge = merge(patientMerge, patient3, by="merge") 537 patientMerge = merge(patientMerge, patient3, by="merge")
519 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") 538 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
539 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
540 patientMerge = patientMerge[NULL,]
541
542 duo.merge.list = list()
543
544 patientMerge12 = merge(patient1, patient2, by="merge")
545 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
546 patientMerge12 = patientMerge12[NULL,]
547 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
548 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
549
550 patientMerge13 = merge(patient1, patient3, by="merge")
551 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
552 patientMerge13 = patientMerge13[NULL,]
553 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
554 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
555
556 patientMerge23 = merge(patient2, patient3, by="merge")
557 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
558 patientMerge23 = patientMerge23[NULL,]
559 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
560 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
561
562 merge.list = list()
563 merge.list[["second"]] = vector()
564
565 start.time = proc.time()
566 if(paste(label1, "123") %in% names(patient.merge.list)){
567 patientMerge = patient.merge.list[[paste(label1, "123")]]
568 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
569 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
570 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
571
572 merge.list[["second"]] = patient.merge.list.second[[label1]]
573
574 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
575 } else {
576 while(nrow(patient.fuzzy) > 0){
577 first.merge = patient.fuzzy[1,"merge"]
578 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
579 first.sample = patient.fuzzy[1,"Sample"]
580
581 merge.filter = first.merge == patient.fuzzy$merge
582
583 second.sample = other.sample.list[[first.sample]][1]
584 third.sample = other.sample.list[[first.sample]][2]
585
586 sample.filter.1 = first.sample == patient.fuzzy$Sample
587 sample.filter.2 = second.sample == patient.fuzzy$Sample
588 sample.filter.3 = third.sample == patient.fuzzy$Sample
589
590 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
591
592 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
593 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
594 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
595
596 matches.in.1 = any(match.filter.1)
597 matches.in.2 = any(match.filter.2)
598 matches.in.3 = any(match.filter.3)
599
600
601
602 rows.1 = patient.fuzzy[match.filter.1,]
603
604 sum.1 = data.frame(merge = first.clone.sequence,
605 Patient = label1,
606 Receptor = rows.1[1,"Receptor"],
607 Sample = rows.1[1,"Sample"],
608 Cell_Count = rows.1[1,"Cell_Count"],
609 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
610 Log10_Frequency = log10(sum(rows.1$Frequency)),
611 Total_Read_Count = sum(rows.1$Total_Read_Count),
612 dsPerM = sum(rows.1$dsPerM),
613 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
614 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
615 Clone_Sequence = first.clone.sequence,
616 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
617 Related_to_leukemia_clone = F,
618 Frequency = sum(rows.1$Frequency),
619 locus_V = rows.1[1,"locus_V"],
620 locus_J = rows.1[1,"locus_J"],
621 normalized_read_count = sum(rows.1$normalized_read_count))
622 sum.2 = sum.1[NULL,]
623 rows.2 = patient.fuzzy[match.filter.2,]
624 if(matches.in.2){
625 sum.2 = data.frame(merge = first.clone.sequence,
626 Patient = label1,
627 Receptor = rows.2[1,"Receptor"],
628 Sample = rows.2[1,"Sample"],
629 Cell_Count = rows.2[1,"Cell_Count"],
630 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
631 Log10_Frequency = log10(sum(rows.2$Frequency)),
632 Total_Read_Count = sum(rows.2$Total_Read_Count),
633 dsPerM = sum(rows.2$dsPerM),
634 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
635 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
636 Clone_Sequence = first.clone.sequence,
637 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
638 Related_to_leukemia_clone = F,
639 Frequency = sum(rows.2$Frequency),
640 locus_V = rows.2[1,"locus_V"],
641 locus_J = rows.2[1,"locus_J"],
642 normalized_read_count = sum(rows.2$normalized_read_count))
643 }
644
645 sum.3 = sum.1[NULL,]
646 rows.3 = patient.fuzzy[match.filter.3,]
647 if(matches.in.3){
648 sum.3 = data.frame(merge = first.clone.sequence,
649 Patient = label1,
650 Receptor = rows.3[1,"Receptor"],
651 Sample = rows.3[1,"Sample"],
652 Cell_Count = rows.3[1,"Cell_Count"],
653 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
654 Log10_Frequency = log10(sum(rows.3$Frequency)),
655 Total_Read_Count = sum(rows.3$Total_Read_Count),
656 dsPerM = sum(rows.3$dsPerM),
657 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
658 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
659 Clone_Sequence = first.clone.sequence,
660 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
661 Related_to_leukemia_clone = F,
662 Frequency = sum(rows.3$Frequency),
663 locus_V = rows.3[1,"locus_V"],
664 locus_J = rows.3[1,"locus_J"],
665 normalized_read_count = sum(rows.3$normalized_read_count))
666 }
667
668 if(matches.in.2 & matches.in.3){
669 merge.123 = merge(sum.1, sum.2, by="merge")
670 merge.123 = merge(merge.123, sum.3, by="merge")
671 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
672 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
673
674 patientMerge = rbind(patientMerge, merge.123)
675 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
676
677 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
678 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
679
680 } else if (matches.in.2) {
681 #other.sample1 = other.sample.list[[first.sample]][1]
682 #other.sample2 = other.sample.list[[first.sample]][2]
683
684 second.sample = sum.2[,"Sample"]
685
686 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
687
688 merge.12 = merge(sum.1, sum.2, by="merge")
689
690 current.merge.list = rbind(current.merge.list, merge.12)
691 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
692
693 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
694
695 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
696 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
697
698 } else if (matches.in.3) {
699
700 #other.sample1 = other.sample.list[[first.sample]][1]
701 #other.sample2 = other.sample.list[[first.sample]][2]
702
703 second.sample = sum.3[,"Sample"]
704
705 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
706
707 merge.13 = merge(sum.1, sum.3, by="merge")
708
709 current.merge.list = rbind(current.merge.list, merge.13)
710 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
711
712 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
713
714 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
715 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
716
717 } else {
718 patient.fuzzy = patient.fuzzy[-1,]
719 }
720
721 tmp.rows = rbind(rows.1, rows.2, rows.3)
722 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
723
724 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
725 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
726 } else {
727 }
728
729 }
730 patient.merge.list[[paste(label1, "123")]] = patientMerge
731
732 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
733 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
734 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
735
736 patient.merge.list[[paste(label1, "12")]] = patientMerge12
737 patient.merge.list[[paste(label1, "13")]] = patientMerge13
738 patient.merge.list[[paste(label1, "23")]] = patientMerge23
739
740 patient.merge.list.second[[label1]] = merge.list[["second"]]
741 }
742 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
520 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) 743 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
521 patientMerge12 = merge(patient1, patient2, by="merge")
522 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) 744 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
523 patientMerge13 = merge(patient1, patient3, by="merge")
524 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) 745 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
525 patientMerge23 = merge(patient2, patient3, by="merge")
526 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) 746 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
527 747
748 if(F){
749 patientMerge = merge(patient1, patient2, by="merge")
750 patientMerge = merge(patientMerge, patient3, by="merge")
751 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
752 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
753 patientMerge12 = merge(patient1, patient2, by="merge")
754 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
755 patientMerge13 = merge(patient1, patient3, by="merge")
756 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
757 patientMerge23 = merge(patient2, patient3, by="merge")
758 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
759 }
528 760
529 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") 761 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
530 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) 762 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
531 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] 763 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
532 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) 764 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
608 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") 840 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
609 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 841 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
610 } 842 }
611 } else { #scatterplot data 843 } else { #scatterplot data
612 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] 844 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
845 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
613 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge) 846 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
614 if(sum(in_two) > 0){ 847 if(sum(in_two) > 0){
615 scatterplot_locus_data[in_two,]$type = "In two" 848 scatterplot_locus_data[in_two,]$type = "In two"
616 } 849 }
617 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge) 850 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
691 print(plt) 924 print(plt)
692 dev.off() 925 dev.off()
693 } 926 }
694 927
695 if(nrow(triplets) != 0){ 928 if(nrow(triplets) != 0){
929
930 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
931
696 triplets$uniqueID = "ID" 932 triplets$uniqueID = "ID"
697 933
698 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 934 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
699 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 935 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
700 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 936 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
702 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 938 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
703 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 939 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
704 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 940 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
705 941
706 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" 942 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
707 943
944 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
945
708 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) 946 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
709 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) 947 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
710 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) 948 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
711 949
712 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) 950 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
718 956
719 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 957 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
720 958
721 triplets = triplets[triplets$normalized_read_count >= min_cells,] 959 triplets = triplets[triplets$normalized_read_count >= min_cells,]
722 960
723 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") 961 column_drops = c("min_cell_count", "min_cell_paste")
724 962
725 triplets = triplets[,!(colnames(triplets) %in% column_drops)] 963 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
726 964
727 #remove duplicate V+J+CDR3, add together numerical values 965 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
728 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor), 966
729 Cell_Count=unique(.SD$Cell_Count),
730 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
731 Total_Read_Count=sum(.SD$Total_Read_Count),
732 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
733 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
734 Frequency=sum(.SD$Frequency),
735 normalized_read_count=sum(.SD$normalized_read_count),
736 Log10_Frequency=sum(.SD$Log10_Frequency),
737 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
738
739
740 interval = intervalReads 967 interval = intervalReads
741 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 968 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
742 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 969 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
743 970
744 one = triplets[triplets$Sample == "14696_reg_BM",] 971 one = triplets[triplets$Sample == "14696_reg_BM",]
745 two = triplets[triplets$Sample == "24536_reg_BM",] 972 two = triplets[triplets$Sample == "24536_reg_BM",]
746 three = triplets[triplets$Sample == "24062_reg_BM",] 973 three = triplets[triplets$Sample == "24062_reg_BM",]
747 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T) 974 #tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
748 975
749 one = triplets[triplets$Sample == "16278_Left",] 976 one = triplets[triplets$Sample == "16278_Left",]
750 two = triplets[triplets$Sample == "26402_Left",] 977 two = triplets[triplets$Sample == "26402_Left",]
751 three = triplets[triplets$Sample == "26759_Left",] 978 three = triplets[triplets$Sample == "26759_Left",]
752 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T) 979 #tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
753 980
754 one = triplets[triplets$Sample == "16278_Right",] 981 one = triplets[triplets$Sample == "16278_Right",]
755 two = triplets[triplets$Sample == "26402_Right",] 982 two = triplets[triplets$Sample == "26402_Right",]
756 three = triplets[triplets$Sample == "26759_Right",] 983 three = triplets[triplets$Sample == "26759_Right",]
757 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T) 984 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
758 985
759 986 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
987
760 interval = intervalFreq 988 interval = intervalFreq
761 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 989 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
762 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 990 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
763 991
764 one = triplets[triplets$Sample == "14696_reg_BM",] 992 one = triplets[triplets$Sample == "14696_reg_BM",]
765 two = triplets[triplets$Sample == "24536_reg_BM",] 993 two = triplets[triplets$Sample == "24536_reg_BM",]
766 three = triplets[triplets$Sample == "24062_reg_BM",] 994 three = triplets[triplets$Sample == "24062_reg_BM",]
767 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F) 995 #tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
768 996
769 one = triplets[triplets$Sample == "16278_Left",] 997 one = triplets[triplets$Sample == "16278_Left",]
770 two = triplets[triplets$Sample == "26402_Left",] 998 two = triplets[triplets$Sample == "26402_Left",]
771 three = triplets[triplets$Sample == "26759_Left",] 999 three = triplets[triplets$Sample == "26759_Left",]
772 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F) 1000 #tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
773 1001
774 one = triplets[triplets$Sample == "16278_Right",] 1002 one = triplets[triplets$Sample == "16278_Right",]
775 two = triplets[triplets$Sample == "26402_Right",] 1003 two = triplets[triplets$Sample == "26402_Right",]
776 three = triplets[triplets$Sample == "26759_Right",] 1004 three = triplets[triplets$Sample == "26759_Right",]
777 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F) 1005 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
778 } else { 1006 } else {
779 cat("", file="triplets.txt") 1007 cat("", file="triplets.txt")
780 } 1008 }
1009 cat("</table></html>", file=logfile, append=T)