comparison RScript.r @ 33:642b4593f0a4 draft

Uploaded
author davidvanzessen
date Fri, 24 Jul 2015 05:33:02 -0400
parents dde5ec847549
children 37d9074ef2c6
comparison
equal deleted inserted replaced
32:dde5ec847549 33:642b4593f0a4
286 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 286 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
287 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") 287 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
288 288
289 cat("</table></html>", file=logfile, append=T) 289 cat("</table></html>", file=logfile, append=T)
290 290
291 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) 291
292 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) 292 if(nrow(single_patients) > 0){
293 p = p + geom_point(aes(colour=type), position="jitter") 293 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
294 p = p + xlab("In one or both samples") + ylab("Reads") 294 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
295 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") 295 p = p + geom_point(aes(colour=type), position="jitter")
296 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080) 296 p = p + xlab("In one or both samples") + ylab("Reads")
297 print(p) 297 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
298 dev.off() 298 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
299 299 print(p)
300 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) 300 dev.off()
301 p = p + geom_point(aes(colour=type), position="jitter") 301
302 p = p + xlab("In one or both samples") + ylab("Frequency") 302 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
303 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") 303 p = p + geom_point(aes(colour=type), position="jitter")
304 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080) 304 p = p + xlab("In one or both samples") + ylab("Frequency")
305 print(p) 305 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
306 dev.off() 306 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
307 307 print(p)
308 dev.off()
309 } else {
310 empty <- data.frame()
311 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
312
313 png("singles_reads_scatterplot.png", width=400, height=300)
314 print(p)
315 dev.off()
316
317 png("singles_freq_scatterplot.png", width=400, height=300)
318 print(p)
319 dev.off()
320 }
308 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ 321 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
309 onShort = "reads" 322 onShort = "reads"
310 if(on == "Frequency"){ 323 if(on == "Frequency"){
311 onShort = "freq" 324 onShort = "freq"
312 } 325 }
512 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) 525 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
513 print(plt) 526 print(plt)
514 dev.off() 527 dev.off()
515 } 528 }
516 529
517 triplets$uniqueID = "ID" 530 if(nrow(triplets) != 0){
518 531 triplets$uniqueID = "ID"
519 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 532
520 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 533 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
521 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" 534 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
522 535 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
523 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 536
524 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 537 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
525 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" 538 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
526 539 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
527 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" 540
528 541 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
529 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) 542
530 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) 543 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
531 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) 544 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
532 545 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
533 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) 546
534 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J) 547 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
535 548 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
536 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")] 549
537 550 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
538 triplets = merge(triplets, min_cell_count, by="min_cell_paste") 551
539 552 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
540 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 553
541 554 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
542 triplets = triplets[triplets$normalized_read_count >= min_cells,] 555
543 556 triplets = triplets[triplets$normalized_read_count >= min_cells,]
544 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") 557
545 558 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
546 triplets = triplets[,!(colnames(triplets) %in% column_drops)] 559
547 560 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
548 #remove duplicate V+J+CDR3, add together numerical values 561
549 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor), 562 #remove duplicate V+J+CDR3, add together numerical values
550 Cell_Count=unique(.SD$Cell_Count), 563 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
551 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), 564 Cell_Count=unique(.SD$Cell_Count),
552 Total_Read_Count=sum(.SD$Total_Read_Count), 565 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
553 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0), 566 Total_Read_Count=sum(.SD$Total_Read_Count),
554 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone), 567 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
555 Frequency=sum(.SD$Frequency), 568 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
556 normalized_read_count=sum(.SD$normalized_read_count), 569 Frequency=sum(.SD$Frequency),
557 Log10_Frequency=sum(.SD$Log10_Frequency), 570 normalized_read_count=sum(.SD$normalized_read_count),
558 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")]) 571 Log10_Frequency=sum(.SD$Log10_Frequency),
559 572 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
560 573
561 interval = intervalReads 574
562 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 575 interval = intervalReads
563 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 576 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
564 577 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
565 one = triplets[triplets$Sample == "14696_reg_BM",] 578
566 two = triplets[triplets$Sample == "24536_reg_BM",] 579 one = triplets[triplets$Sample == "14696_reg_BM",]
567 three = triplets[triplets$Sample == "24062_reg_BM",] 580 two = triplets[triplets$Sample == "24536_reg_BM",]
568 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) 581 three = triplets[triplets$Sample == "24062_reg_BM",]
569 582 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
570 one = triplets[triplets$Sample == "16278_Left",] 583
571 two = triplets[triplets$Sample == "26402_Left",] 584 one = triplets[triplets$Sample == "16278_Left",]
572 three = triplets[triplets$Sample == "26759_Left",] 585 two = triplets[triplets$Sample == "26402_Left",]
573 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) 586 three = triplets[triplets$Sample == "26759_Left",]
574 587 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
575 one = triplets[triplets$Sample == "16278_Right",] 588
576 two = triplets[triplets$Sample == "26402_Right",] 589 one = triplets[triplets$Sample == "16278_Right",]
577 three = triplets[triplets$Sample == "26759_Right",] 590 two = triplets[triplets$Sample == "26402_Right",]
578 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) 591 three = triplets[triplets$Sample == "26759_Right",]
579 592 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
580 593
581 interval = intervalFreq 594
582 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 595 interval = intervalFreq
583 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 596 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
584 597 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
585 one = triplets[triplets$Sample == "14696_reg_BM",] 598
586 two = triplets[triplets$Sample == "24536_reg_BM",] 599 one = triplets[triplets$Sample == "14696_reg_BM",]
587 three = triplets[triplets$Sample == "24062_reg_BM",] 600 two = triplets[triplets$Sample == "24536_reg_BM",]
588 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) 601 three = triplets[triplets$Sample == "24062_reg_BM",]
589 602 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
590 one = triplets[triplets$Sample == "16278_Left",] 603
591 two = triplets[triplets$Sample == "26402_Left",] 604 one = triplets[triplets$Sample == "16278_Left",]
592 three = triplets[triplets$Sample == "26759_Left",] 605 two = triplets[triplets$Sample == "26402_Left",]
593 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) 606 three = triplets[triplets$Sample == "26759_Left",]
594 607 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
595 one = triplets[triplets$Sample == "16278_Right",] 608
596 two = triplets[triplets$Sample == "26402_Right",] 609 one = triplets[triplets$Sample == "16278_Right",]
597 three = triplets[triplets$Sample == "26759_Right",] 610 two = triplets[triplets$Sample == "26402_Right",]
598 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) 611 three = triplets[triplets$Sample == "26759_Right",]
612 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
613 } else {
614 cat("", file="triplets.txt")
615 }