Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 60:28c3695259c1 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 29 Oct 2015 11:21:33 -0400 |
parents | 36e307208f1b |
children | 77a090cd0e02 |
comparison
equal
deleted
inserted
replaced
59:36e307208f1b | 60:28c3695259c1 |
---|---|
440 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) | 440 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) |
441 | 441 |
442 interval = intervalFreq | 442 interval = intervalFreq |
443 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 443 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
444 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 444 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
445 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) | 445 lapply(patients[4], FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) |
446 | 446 |
447 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) | 447 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) |
448 | 448 |
449 interval = intervalReads | 449 interval = intervalReads |
450 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 450 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
452 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") | 452 lapply(patients[4], FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") |
453 | |
454 cat("</table></html>", file=logfile, append=T) | |
455 | |
456 | 453 |
457 if(nrow(single_patients) > 0){ | 454 if(nrow(single_patients) > 0){ |
458 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 455 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
459 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) | 456 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) |
460 p = p + geom_point(aes(colour=type), position="jitter") | 457 p = p + geom_point(aes(colour=type), position="jitter") |
481 | 478 |
482 png("singles_freq_scatterplot.png", width=400, height=300) | 479 png("singles_freq_scatterplot.png", width=400, height=300) |
483 print(p) | 480 print(p) |
484 dev.off() | 481 dev.off() |
485 } | 482 } |
483 | |
484 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... | |
485 patient.merge.list.second = list() | |
486 | |
486 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ | 487 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ |
487 onShort = "reads" | 488 onShort = "reads" |
488 if(on == "Frequency"){ | 489 if(on == "Frequency"){ |
489 onShort = "freq" | 490 onShort = "freq" |
490 } | 491 } |
500 sampleIndex = which(colnames(patient1) == "Sample") | 501 sampleIndex = which(colnames(patient1) == "Sample") |
501 patientIndex = which(colnames(patient1) == "Patient") | 502 patientIndex = which(colnames(patient1) == "Patient") |
502 oneSample = paste(patient1[1,sampleIndex], sep="") | 503 oneSample = paste(patient1[1,sampleIndex], sep="") |
503 twoSample = paste(patient2[1,sampleIndex], sep="") | 504 twoSample = paste(patient2[1,sampleIndex], sep="") |
504 threeSample = paste(patient3[1,sampleIndex], sep="") | 505 threeSample = paste(patient3[1,sampleIndex], sep="") |
505 | 506 |
506 if(mergeOn == "Clone_Sequence"){ | 507 if(mergeOn == "Clone_Sequence"){ |
507 patient1$merge = paste(patient1$Clone_Sequence) | 508 patient1$merge = paste(patient1$Clone_Sequence) |
508 patient2$merge = paste(patient2$Clone_Sequence) | 509 patient2$merge = paste(patient2$Clone_Sequence) |
509 patient3$merge = paste(patient3$Clone_Sequence) | 510 patient3$merge = paste(patient3$Clone_Sequence) |
510 | 511 |
511 } else { | 512 } else { |
512 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 513 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) |
513 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 514 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) |
514 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) | 515 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) |
515 } | 516 } |
516 | 517 |
518 #patientMerge = merge(patient1, patient2, by="merge")[NULL,] | |
519 patient1.fuzzy = patient1 | |
520 patient2.fuzzy = patient2 | |
521 patient3.fuzzy = patient3 | |
522 | |
523 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T) | |
524 | |
525 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) | |
526 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) | |
527 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J) | |
528 | |
529 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy) | |
530 | |
531 other.sample.list = list() | |
532 other.sample.list[[oneSample]] = c(twoSample, threeSample) | |
533 other.sample.list[[twoSample]] = c(oneSample, threeSample) | |
534 other.sample.list[[threeSample]] = c(oneSample, twoSample) | |
535 | |
517 patientMerge = merge(patient1, patient2, by="merge") | 536 patientMerge = merge(patient1, patient2, by="merge") |
518 patientMerge = merge(patientMerge, patient3, by="merge") | 537 patientMerge = merge(patientMerge, patient3, by="merge") |
519 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") | 538 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") |
539 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | |
540 patientMerge = patientMerge[NULL,] | |
541 | |
542 duo.merge.list = list() | |
543 | |
544 patientMerge12 = merge(patient1, patient2, by="merge") | |
545 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) | |
546 patientMerge12 = patientMerge12[NULL,] | |
547 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12 | |
548 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12 | |
549 | |
550 patientMerge13 = merge(patient1, patient3, by="merge") | |
551 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) | |
552 patientMerge13 = patientMerge13[NULL,] | |
553 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13 | |
554 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13 | |
555 | |
556 patientMerge23 = merge(patient2, patient3, by="merge") | |
557 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) | |
558 patientMerge23 = patientMerge23[NULL,] | |
559 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23 | |
560 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23 | |
561 | |
562 merge.list = list() | |
563 merge.list[["second"]] = vector() | |
564 | |
565 start.time = proc.time() | |
566 if(paste(label1, "123") %in% names(patient.merge.list)){ | |
567 patientMerge = patient.merge.list[[paste(label1, "123")]] | |
568 patientMerge12 = patient.merge.list[[paste(label1, "12")]] | |
569 patientMerge13 = patient.merge.list[[paste(label1, "13")]] | |
570 patientMerge23 = patient.merge.list[[paste(label1, "23")]] | |
571 | |
572 merge.list[["second"]] = patient.merge.list.second[[label1]] | |
573 | |
574 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) | |
575 } else { | |
576 while(nrow(patient.fuzzy) > 0){ | |
577 first.merge = patient.fuzzy[1,"merge"] | |
578 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] | |
579 first.sample = patient.fuzzy[1,"Sample"] | |
580 | |
581 merge.filter = first.merge == patient.fuzzy$merge | |
582 | |
583 second.sample = other.sample.list[[first.sample]][1] | |
584 third.sample = other.sample.list[[first.sample]][2] | |
585 | |
586 sample.filter.1 = first.sample == patient.fuzzy$Sample | |
587 sample.filter.2 = second.sample == patient.fuzzy$Sample | |
588 sample.filter.3 = third.sample == patient.fuzzy$Sample | |
589 | |
590 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) | |
591 | |
592 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter | |
593 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter | |
594 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter | |
595 | |
596 matches.in.1 = any(match.filter.1) | |
597 matches.in.2 = any(match.filter.2) | |
598 matches.in.3 = any(match.filter.3) | |
599 | |
600 | |
601 | |
602 rows.1 = patient.fuzzy[match.filter.1,] | |
603 | |
604 sum.1 = data.frame(merge = first.clone.sequence, | |
605 Patient = label1, | |
606 Receptor = rows.1[1,"Receptor"], | |
607 Sample = rows.1[1,"Sample"], | |
608 Cell_Count = rows.1[1,"Cell_Count"], | |
609 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes), | |
610 Log10_Frequency = log10(sum(rows.1$Frequency)), | |
611 Total_Read_Count = sum(rows.1$Total_Read_Count), | |
612 dsPerM = sum(rows.1$dsPerM), | |
613 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"], | |
614 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"], | |
615 Clone_Sequence = first.clone.sequence, | |
616 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"], | |
617 Related_to_leukemia_clone = F, | |
618 Frequency = sum(rows.1$Frequency), | |
619 locus_V = rows.1[1,"locus_V"], | |
620 locus_J = rows.1[1,"locus_J"], | |
621 normalized_read_count = sum(rows.1$normalized_read_count)) | |
622 sum.2 = sum.1[NULL,] | |
623 rows.2 = patient.fuzzy[match.filter.2,] | |
624 if(matches.in.2){ | |
625 sum.2 = data.frame(merge = first.clone.sequence, | |
626 Patient = label1, | |
627 Receptor = rows.2[1,"Receptor"], | |
628 Sample = rows.2[1,"Sample"], | |
629 Cell_Count = rows.2[1,"Cell_Count"], | |
630 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes), | |
631 Log10_Frequency = log10(sum(rows.2$Frequency)), | |
632 Total_Read_Count = sum(rows.2$Total_Read_Count), | |
633 dsPerM = sum(rows.2$dsPerM), | |
634 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"], | |
635 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"], | |
636 Clone_Sequence = first.clone.sequence, | |
637 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"], | |
638 Related_to_leukemia_clone = F, | |
639 Frequency = sum(rows.2$Frequency), | |
640 locus_V = rows.2[1,"locus_V"], | |
641 locus_J = rows.2[1,"locus_J"], | |
642 normalized_read_count = sum(rows.2$normalized_read_count)) | |
643 } | |
644 | |
645 sum.3 = sum.1[NULL,] | |
646 rows.3 = patient.fuzzy[match.filter.3,] | |
647 if(matches.in.3){ | |
648 sum.3 = data.frame(merge = first.clone.sequence, | |
649 Patient = label1, | |
650 Receptor = rows.3[1,"Receptor"], | |
651 Sample = rows.3[1,"Sample"], | |
652 Cell_Count = rows.3[1,"Cell_Count"], | |
653 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes), | |
654 Log10_Frequency = log10(sum(rows.3$Frequency)), | |
655 Total_Read_Count = sum(rows.3$Total_Read_Count), | |
656 dsPerM = sum(rows.3$dsPerM), | |
657 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"], | |
658 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"], | |
659 Clone_Sequence = first.clone.sequence, | |
660 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"], | |
661 Related_to_leukemia_clone = F, | |
662 Frequency = sum(rows.3$Frequency), | |
663 locus_V = rows.3[1,"locus_V"], | |
664 locus_J = rows.3[1,"locus_J"], | |
665 normalized_read_count = sum(rows.3$normalized_read_count)) | |
666 } | |
667 | |
668 if(matches.in.2 & matches.in.3){ | |
669 merge.123 = merge(sum.1, sum.2, by="merge") | |
670 merge.123 = merge(merge.123, sum.3, by="merge") | |
671 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="") | |
672 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz]) | |
673 | |
674 patientMerge = rbind(patientMerge, merge.123) | |
675 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),] | |
676 | |
677 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) | |
678 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | |
679 | |
680 } else if (matches.in.2) { | |
681 #other.sample1 = other.sample.list[[first.sample]][1] | |
682 #other.sample2 = other.sample.list[[first.sample]][2] | |
683 | |
684 second.sample = sum.2[,"Sample"] | |
685 | |
686 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]] | |
687 | |
688 merge.12 = merge(sum.1, sum.2, by="merge") | |
689 | |
690 current.merge.list = rbind(current.merge.list, merge.12) | |
691 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list | |
692 | |
693 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),] | |
694 | |
695 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) | |
696 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | |
697 | |
698 } else if (matches.in.3) { | |
699 | |
700 #other.sample1 = other.sample.list[[first.sample]][1] | |
701 #other.sample2 = other.sample.list[[first.sample]][2] | |
702 | |
703 second.sample = sum.3[,"Sample"] | |
704 | |
705 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]] | |
706 | |
707 merge.13 = merge(sum.1, sum.3, by="merge") | |
708 | |
709 current.merge.list = rbind(current.merge.list, merge.13) | |
710 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list | |
711 | |
712 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),] | |
713 | |
714 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"]) | |
715 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences) | |
716 | |
717 } else { | |
718 patient.fuzzy = patient.fuzzy[-1,] | |
719 } | |
720 | |
721 tmp.rows = rbind(rows.1, rows.2, rows.3) | |
722 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),] | |
723 | |
724 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) { | |
725 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T) | |
726 } else { | |
727 } | |
728 | |
729 } | |
730 patient.merge.list[[paste(label1, "123")]] = patientMerge | |
731 | |
732 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]] | |
733 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]] | |
734 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]] | |
735 | |
736 patient.merge.list[[paste(label1, "12")]] = patientMerge12 | |
737 patient.merge.list[[paste(label1, "13")]] = patientMerge13 | |
738 patient.merge.list[[paste(label1, "23")]] = patientMerge23 | |
739 | |
740 patient.merge.list.second[[label1]] = merge.list[["second"]] | |
741 } | |
742 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) | |
520 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | 743 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) |
521 patientMerge12 = merge(patient1, patient2, by="merge") | |
522 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) | 744 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) |
523 patientMerge13 = merge(patient1, patient3, by="merge") | |
524 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) | 745 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) |
525 patientMerge23 = merge(patient2, patient3, by="merge") | |
526 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) | 746 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) |
527 | 747 |
748 if(F){ | |
749 patientMerge = merge(patient1, patient2, by="merge") | |
750 patientMerge = merge(patientMerge, patient3, by="merge") | |
751 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") | |
752 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | |
753 patientMerge12 = merge(patient1, patient2, by="merge") | |
754 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony]) | |
755 patientMerge13 = merge(patient1, patient3, by="merge") | |
756 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) | |
757 patientMerge23 = merge(patient2, patient3, by="merge") | |
758 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) | |
759 } | |
528 | 760 |
529 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") | 761 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") |
530 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) | 762 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) |
531 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] | 763 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] |
532 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) | 764 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) |
608 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") | 840 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") |
609 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 841 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
610 } | 842 } |
611 } else { #scatterplot data | 843 } else { #scatterplot data |
612 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] | 844 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] |
845 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),] | |
613 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge) | 846 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge) |
614 if(sum(in_two) > 0){ | 847 if(sum(in_two) > 0){ |
615 scatterplot_locus_data[in_two,]$type = "In two" | 848 scatterplot_locus_data[in_two,]$type = "In two" |
616 } | 849 } |
617 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge) | 850 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge) |
691 print(plt) | 924 print(plt) |
692 dev.off() | 925 dev.off() |
693 } | 926 } |
694 | 927 |
695 if(nrow(triplets) != 0){ | 928 if(nrow(triplets) != 0){ |
929 | |
930 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T) | |
931 | |
696 triplets$uniqueID = "ID" | 932 triplets$uniqueID = "ID" |
697 | 933 |
698 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 934 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" |
699 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 935 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" |
700 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 936 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" |
702 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 938 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" |
703 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 939 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" |
704 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 940 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" |
705 | 941 |
706 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" | 942 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" |
707 | 943 |
944 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T) | |
945 | |
708 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) | 946 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) |
709 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) | 947 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) |
710 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) | 948 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) |
711 | 949 |
712 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) | 950 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) |
718 | 956 |
719 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 | 957 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 |
720 | 958 |
721 triplets = triplets[triplets$normalized_read_count >= min_cells,] | 959 triplets = triplets[triplets$normalized_read_count >= min_cells,] |
722 | 960 |
723 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") | 961 column_drops = c("min_cell_count", "min_cell_paste") |
724 | 962 |
725 triplets = triplets[,!(colnames(triplets) %in% column_drops)] | 963 triplets = triplets[,!(colnames(triplets) %in% column_drops)] |
726 | 964 |
727 #remove duplicate V+J+CDR3, add together numerical values | 965 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) |
728 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor), | 966 |
729 Cell_Count=unique(.SD$Cell_Count), | |
730 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), | |
731 Total_Read_Count=sum(.SD$Total_Read_Count), | |
732 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0), | |
733 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone), | |
734 Frequency=sum(.SD$Frequency), | |
735 normalized_read_count=sum(.SD$normalized_read_count), | |
736 Log10_Frequency=sum(.SD$Log10_Frequency), | |
737 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | |
738 | |
739 | |
740 interval = intervalReads | 967 interval = intervalReads |
741 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 968 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
742 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 969 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
743 | 970 |
744 one = triplets[triplets$Sample == "14696_reg_BM",] | 971 one = triplets[triplets$Sample == "14696_reg_BM",] |
745 two = triplets[triplets$Sample == "24536_reg_BM",] | 972 two = triplets[triplets$Sample == "24536_reg_BM",] |
746 three = triplets[triplets$Sample == "24062_reg_BM",] | 973 three = triplets[triplets$Sample == "24062_reg_BM",] |
747 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T) | 974 #tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T) |
748 | 975 |
749 one = triplets[triplets$Sample == "16278_Left",] | 976 one = triplets[triplets$Sample == "16278_Left",] |
750 two = triplets[triplets$Sample == "26402_Left",] | 977 two = triplets[triplets$Sample == "26402_Left",] |
751 three = triplets[triplets$Sample == "26759_Left",] | 978 three = triplets[triplets$Sample == "26759_Left",] |
752 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T) | 979 #tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T) |
753 | 980 |
754 one = triplets[triplets$Sample == "16278_Right",] | 981 one = triplets[triplets$Sample == "16278_Right",] |
755 two = triplets[triplets$Sample == "26402_Right",] | 982 two = triplets[triplets$Sample == "26402_Right",] |
756 three = triplets[triplets$Sample == "26759_Right",] | 983 three = triplets[triplets$Sample == "26759_Right",] |
757 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T) | 984 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T) |
758 | 985 |
759 | 986 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) |
987 | |
760 interval = intervalFreq | 988 interval = intervalFreq |
761 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 989 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
762 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 990 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
763 | 991 |
764 one = triplets[triplets$Sample == "14696_reg_BM",] | 992 one = triplets[triplets$Sample == "14696_reg_BM",] |
765 two = triplets[triplets$Sample == "24536_reg_BM",] | 993 two = triplets[triplets$Sample == "24536_reg_BM",] |
766 three = triplets[triplets$Sample == "24062_reg_BM",] | 994 three = triplets[triplets$Sample == "24062_reg_BM",] |
767 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F) | 995 #tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F) |
768 | 996 |
769 one = triplets[triplets$Sample == "16278_Left",] | 997 one = triplets[triplets$Sample == "16278_Left",] |
770 two = triplets[triplets$Sample == "26402_Left",] | 998 two = triplets[triplets$Sample == "26402_Left",] |
771 three = triplets[triplets$Sample == "26759_Left",] | 999 three = triplets[triplets$Sample == "26759_Left",] |
772 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F) | 1000 #tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F) |
773 | 1001 |
774 one = triplets[triplets$Sample == "16278_Right",] | 1002 one = triplets[triplets$Sample == "16278_Right",] |
775 two = triplets[triplets$Sample == "26402_Right",] | 1003 two = triplets[triplets$Sample == "26402_Right",] |
776 three = triplets[triplets$Sample == "26759_Right",] | 1004 three = triplets[triplets$Sample == "26759_Right",] |
777 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F) | 1005 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F) |
778 } else { | 1006 } else { |
779 cat("", file="triplets.txt") | 1007 cat("", file="triplets.txt") |
780 } | 1008 } |
1009 cat("</table></html>", file=logfile, append=T) |