clonal_sequences_in_paired_samples: RScript.r comparison

comparison RScript.r @ 33:642b4593f0a4 draft

Uploaded

author	davidvanzessen
date	Fri, 24 Jul 2015 05:33:02 -0400
parents	dde5ec847549
children	37d9074ef2c6

comparison

equal deleted inserted replaced

-:dde5ec847549
+:642b4593f0a4
 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
 cat("</table></html>", file=logfile, append=T)
-scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
-p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
+if(nrow(single_patients) > 0){
-p = p + geom_point(aes(colour=type), position="jitter")
+	scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
-p = p + xlab("In one or both samples") + ylab("Reads")
+	p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
-p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
+	p = p + geom_point(aes(colour=type), position="jitter")
-png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
+	p = p + xlab("In one or both samples") + ylab("Reads")
-print(p)
+	p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
-dev.off()
+	png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
+	print(p)
-p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
+	dev.off()
-p = p + geom_point(aes(colour=type), position="jitter")
-p = p + xlab("In one or both samples") + ylab("Frequency")
+	p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
-p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
+	p = p + geom_point(aes(colour=type), position="jitter")
-png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
+	p = p + xlab("In one or both samples") + ylab("Frequency")
-print(p)
+	p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
-dev.off()
+	png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
+	print(p)
+	dev.off()
+} else {
+	empty <- data.frame()
+	p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
+	png("singles_reads_scatterplot.png", width=400, height=300)
+	print(p)
+	dev.off()
+	png("singles_freq_scatterplot.png", width=400, height=300)
+	print(p)
+	dev.off()
+}
 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
 onShort = "reads"
 if(on == "Frequency"){
 onShort = "freq"
 }
 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
 print(plt)
 dev.off()
 }
-triplets$uniqueID = "ID"
+if(nrow(triplets) != 0){
+triplets$uniqueID = "ID"
-triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
-triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
+triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
-triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
+triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
+triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
-triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
-triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
+triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
-triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
+triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
+triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
-triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
+triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
-triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
-triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
+triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
-min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
+triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
+min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
-triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
-min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
+triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
+min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
-min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
+min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
-triplets = merge(triplets, min_cell_count, by="min_cell_paste")
+triplets = merge(triplets, min_cell_count, by="min_cell_paste")
-triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
+triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
-triplets = triplets[triplets$normalized_read_count >= min_cells,]
+triplets = triplets[triplets$normalized_read_count >= min_cells,]
-column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
+column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
-triplets = triplets[,!(colnames(triplets) %in% column_drops)]
+triplets = triplets[,!(colnames(triplets) %in% column_drops)]
-#remove duplicate V+J+CDR3, add together numerical values
-triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
+#remove duplicate V+J+CDR3, add together numerical values
-Cell_Count=unique(.SD$Cell_Count),
+triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
-Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
+Cell_Count=unique(.SD$Cell_Count),
-Total_Read_Count=sum(.SD$Total_Read_Count),
+Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
-dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
+Total_Read_Count=sum(.SD$Total_Read_Count),
-Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
+dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
-Frequency=sum(.SD$Frequency),
+Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
-normalized_read_count=sum(.SD$normalized_read_count),
+Frequency=sum(.SD$Frequency),
-Log10_Frequency=sum(.SD$Log10_Frequency),
+normalized_read_count=sum(.SD$normalized_read_count),
-Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
+Log10_Frequency=sum(.SD$Log10_Frequency),
+Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
-interval = intervalReads
-intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
+interval = intervalReads
-product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
+intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
+product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
-one = triplets[triplets$Sample == "14696_reg_BM",]
-two = triplets[triplets$Sample == "24536_reg_BM",]
+one = triplets[triplets$Sample == "14696_reg_BM",]
-three = triplets[triplets$Sample == "24062_reg_BM",]
+two = triplets[triplets$Sample == "24536_reg_BM",]
-tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
+three = triplets[triplets$Sample == "24062_reg_BM",]
+tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
-one = triplets[triplets$Sample == "16278_Left",]
-two = triplets[triplets$Sample == "26402_Left",]
+one = triplets[triplets$Sample == "16278_Left",]
-three = triplets[triplets$Sample == "26759_Left",]
+two = triplets[triplets$Sample == "26402_Left",]
-tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
+three = triplets[triplets$Sample == "26759_Left",]
+tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
-one = triplets[triplets$Sample == "16278_Right",]
-two = triplets[triplets$Sample == "26402_Right",]
+one = triplets[triplets$Sample == "16278_Right",]
-three = triplets[triplets$Sample == "26759_Right",]
+two = triplets[triplets$Sample == "26402_Right",]
-tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
+three = triplets[triplets$Sample == "26759_Right",]
+tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
-interval = intervalFreq
-intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
+interval = intervalFreq
-product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
+intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
+product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
-one = triplets[triplets$Sample == "14696_reg_BM",]
-two = triplets[triplets$Sample == "24536_reg_BM",]
+one = triplets[triplets$Sample == "14696_reg_BM",]
-three = triplets[triplets$Sample == "24062_reg_BM",]
+two = triplets[triplets$Sample == "24536_reg_BM",]
-tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
+three = triplets[triplets$Sample == "24062_reg_BM",]
+tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
-one = triplets[triplets$Sample == "16278_Left",]
-two = triplets[triplets$Sample == "26402_Left",]
+one = triplets[triplets$Sample == "16278_Left",]
-three = triplets[triplets$Sample == "26759_Left",]
+two = triplets[triplets$Sample == "26402_Left",]
-tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
+three = triplets[triplets$Sample == "26759_Left",]
+tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
-one = triplets[triplets$Sample == "16278_Right",]
-two = triplets[triplets$Sample == "26402_Right",]
+one = triplets[triplets$Sample == "16278_Right",]
-three = triplets[triplets$Sample == "26759_Right",]
+two = triplets[triplets$Sample == "26402_Right",]
-tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
+three = triplets[triplets$Sample == "26759_Right",]
+tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
+} else {
+cat("", file="triplets.txt")
+}

Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples

comparison RScript.r @ 33:642b4593f0a4 draft