Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
changeset 33:642b4593f0a4 draft
Uploaded
| author | davidvanzessen | 
|---|---|
| date | Fri, 24 Jul 2015 05:33:02 -0400 | 
| parents | dde5ec847549 | 
| children | 37d9074ef2c6 | 
| files | RScript.r | 
| diffstat | 1 files changed, 115 insertions(+), 98 deletions(-) [+] | 
line wrap: on
 line diff
--- a/RScript.r Tue Jun 02 05:33:58 2015 -0400 +++ b/RScript.r Fri Jul 24 05:33:02 2015 -0400 @@ -288,23 +288,36 @@ cat("</table></html>", file=logfile, append=T) -scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) -p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) -p = p + geom_point(aes(colour=type), position="jitter") -p = p + xlab("In one or both samples") + ylab("Reads") -p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") -png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080) -print(p) -dev.off() + +if(nrow(single_patients) > 0){ + scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) + p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) + p = p + geom_point(aes(colour=type), position="jitter") + p = p + xlab("In one or both samples") + ylab("Reads") + p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") + png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) + print(p) + dev.off() -p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) -p = p + geom_point(aes(colour=type), position="jitter") -p = p + xlab("In one or both samples") + ylab("Frequency") -p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") -png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080) -print(p) -dev.off() - + p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + p = p + geom_point(aes(colour=type), position="jitter") + p = p + xlab("In one or both samples") + ylab("Frequency") + p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") + png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080) + print(p) + dev.off() +} else { + empty <- data.frame() + p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample") + + png("singles_reads_scatterplot.png", width=400, height=300) + print(p) + dev.off() + + png("singles_freq_scatterplot.png", width=400, height=300) + print(p) + dev.off() +} tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ onShort = "reads" if(on == "Frequency"){ @@ -514,85 +527,89 @@ dev.off() } -triplets$uniqueID = "ID" - -triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" -triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" -triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" - -triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" -triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" -triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" - -triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" - -triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) -triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) -min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) - -triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) -min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J) - -min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")] - -triplets = merge(triplets, min_cell_count, by="min_cell_paste") - -triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 - -triplets = triplets[triplets$normalized_read_count >= min_cells,] - -column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") - -triplets = triplets[,!(colnames(triplets) %in% column_drops)] - -#remove duplicate V+J+CDR3, add together numerical values -triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor), - Cell_Count=unique(.SD$Cell_Count), - Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), - Total_Read_Count=sum(.SD$Total_Read_Count), - dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0), - Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone), - Frequency=sum(.SD$Frequency), - normalized_read_count=sum(.SD$normalized_read_count), - Log10_Frequency=sum(.SD$Log10_Frequency), - Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")]) - - -interval = intervalReads -intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) -product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) - -one = triplets[triplets$Sample == "14696_reg_BM",] -two = triplets[triplets$Sample == "24536_reg_BM",] -three = triplets[triplets$Sample == "24062_reg_BM",] -tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) - -one = triplets[triplets$Sample == "16278_Left",] -two = triplets[triplets$Sample == "26402_Left",] -three = triplets[triplets$Sample == "26759_Left",] -tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) - -one = triplets[triplets$Sample == "16278_Right",] -two = triplets[triplets$Sample == "26402_Right",] -three = triplets[triplets$Sample == "26759_Right",] -tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) - - -interval = intervalFreq -intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) -product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) - -one = triplets[triplets$Sample == "14696_reg_BM",] -two = triplets[triplets$Sample == "24536_reg_BM",] -three = triplets[triplets$Sample == "24062_reg_BM",] -tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) - -one = triplets[triplets$Sample == "16278_Left",] -two = triplets[triplets$Sample == "26402_Left",] -three = triplets[triplets$Sample == "26759_Left",] -tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) - -one = triplets[triplets$Sample == "16278_Right",] -two = triplets[triplets$Sample == "26402_Right",] -three = triplets[triplets$Sample == "26759_Right",] -tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) +if(nrow(triplets) != 0){ + triplets$uniqueID = "ID" + + triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" + triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" + triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" + + triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" + triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" + triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" + + triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" + + triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4) + triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4) + min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")]) + + triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J) + min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J) + + min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")] + + triplets = merge(triplets, min_cell_count, by="min_cell_paste") + + triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2 + + triplets = triplets[triplets$normalized_read_count >= min_cells,] + + column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") + + triplets = triplets[,!(colnames(triplets) %in% column_drops)] + + #remove duplicate V+J+CDR3, add together numerical values + triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor), + Cell_Count=unique(.SD$Cell_Count), + Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), + Total_Read_Count=sum(.SD$Total_Read_Count), + dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0), + Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone), + Frequency=sum(.SD$Frequency), + normalized_read_count=sum(.SD$normalized_read_count), + Log10_Frequency=sum(.SD$Log10_Frequency), + Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")]) + + + interval = intervalReads + intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) + product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) + + one = triplets[triplets$Sample == "14696_reg_BM",] + two = triplets[triplets$Sample == "24536_reg_BM",] + three = triplets[triplets$Sample == "24062_reg_BM",] + tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) + + one = triplets[triplets$Sample == "16278_Left",] + two = triplets[triplets$Sample == "26402_Left",] + three = triplets[triplets$Sample == "26759_Left",] + tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) + + one = triplets[triplets$Sample == "16278_Right",] + two = triplets[triplets$Sample == "26402_Right",] + three = triplets[triplets$Sample == "26759_Right",] + tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) + + + interval = intervalFreq + intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) + product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) + + one = triplets[triplets$Sample == "14696_reg_BM",] + two = triplets[triplets$Sample == "24536_reg_BM",] + three = triplets[triplets$Sample == "24062_reg_BM",] + tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) + + one = triplets[triplets$Sample == "16278_Left",] + two = triplets[triplets$Sample == "26402_Left",] + three = triplets[triplets$Sample == "26759_Left",] + tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) + + one = triplets[triplets$Sample == "16278_Right",] + two = triplets[triplets$Sample == "26402_Right",] + three = triplets[triplets$Sample == "26759_Right",] + tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) +} else { + cat("", file="triplets.txt") +}
