Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 9:58a28427930e draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 30 Sep 2014 10:06:57 -0400 |
parents | fa240d1c57a9 |
children | 974febc99fd4 |
comparison
equal
deleted
inserted
replaced
8:fa240d1c57a9 | 9:58a28427930e |
---|---|
13 library(data.table) | 13 library(data.table) |
14 library(grid) | 14 library(grid) |
15 library(parallel) | 15 library(parallel) |
16 #require(xtable) | 16 #require(xtable) |
17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T) | 17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T) |
18 dat = read.csv(inFile, sep="\t") | 18 dat = read.table(inFile, header=T, sep="\t", dec=",", fill=T, stringsAsFactors=F) |
19 #dat = data.frame(fread(inFile)) #faster but with a dep | 19 dat = dat[!is.na(dat$Patient),] |
20 | |
20 setwd(outDir) | 21 setwd(outDir) |
21 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T) | 22 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T) |
22 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1))) | 23 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1))) |
23 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1))) | 24 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1))) |
24 | 25 |
26 str(dat) | |
27 cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T) | |
28 dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | |
29 | |
25 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T) | 30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T) |
26 dat$Frequency = ((10^dat$Log10_Frequency)*100) | 31 dat$Frequency = ((10^dat$Log10_Frequency)*100) |
27 | 32 |
28 dat = dat[dat$Frequency >= min_freq,] | 33 dat = dat[dat$Frequency >= min_freq,] |
29 | 34 |
30 cat("<tr><td>Normalizing cell count to 1.000.000</td></tr>", file=logfile, append=T) | 35 cat("<tr><td>Normalizing cell count to 1.000.000</td></tr>", file=logfile, append=T) |
31 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2) | 36 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2) |
32 dat = dat[dat$normalized_read_count >= min_cells,] | 37 dat = dat[dat$normalized_read_count >= min_cells,] |
33 dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence) | 38 dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence) |
34 cat("<tr><td>Removing duplicates</td></tr>", file=logfile, append=T) | 39 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),] |
35 dat = dat[!duplicated(dat$paste),] | 40 |
36 patients = split(dat, dat$Patient, drop=T) | 41 patients = split(dat, dat$Patient, drop=T) |
37 intervalReads = rev(c(0,10,25,50,100,1000,10000)) | 42 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000)) |
38 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5)) | 43 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5)) |
39 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV") | 44 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV") |
40 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") | 45 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") |
41 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb") | 46 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb") |
42 Titles = factor(Titles, levels=Titles) | 47 Titles = factor(Titles, levels=Titles) |
80 } | 85 } |
81 if(appendtxt){ | 86 if(appendtxt){ |
82 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) | 87 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) |
83 } | 88 } |
84 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T) | 89 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T) |
85 patientMerge = merge(patient1, patient2, by="Clone_Sequence") | 90 |
91 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | |
92 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | |
93 | |
94 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | |
86 res1 = vector() | 95 res1 = vector() |
87 res2 = vector() | 96 res2 = vector() |
88 resBoth = vector() | 97 resBoth = vector() |
89 read1Count = vector() | 98 read1Count = vector() |
90 read2Count = vector() | 99 read2Count = vector() |
91 locussum1 = vector() | 100 locussum1 = vector() |
92 locussum2 = vector() | 101 locussum2 = vector() |
102 | |
103 print(patient) | |
93 #for(iter in 1){ | 104 #for(iter in 1){ |
94 for(iter in 1:length(product[,1])){ | 105 for(iter in 1:length(product[,1])){ |
95 threshhold = product[iter,threshholdIndex] | 106 threshhold = product[iter,threshholdIndex] |
96 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 107 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
97 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 108 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
98 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold) | 109 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold) |
99 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) | 110 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[both,]$CDR3_Sense_Sequence)) |
100 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) | 111 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[both,]$CDR3_Sense_Sequence)) |
101 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.x)) | 112 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.x)) |
102 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.y)) | 113 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.y)) |
103 res1 = append(res1, sum(one)) | 114 res1 = append(res1, sum(one)) |
104 res2 = append(res2, sum(two)) | 115 res2 = append(res2, sum(two)) |
105 resBoth = append(resBoth, sum(both)) | 116 resBoth = append(resBoth, sum(both)) |
106 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count)) | 117 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count)) |
107 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count)) | 118 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count)) |
108 #threshhold = 0 | 119 #threshhold = 0 |
109 if(threshhold != 0){ | 120 if(threshhold != 0){ |
110 if(sum(one) > 0){ | 121 if(sum(one) > 0){ |
111 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 122 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
112 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") | 123 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone") |
113 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 124 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="") |
114 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 125 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
115 } | 126 } |
116 if(sum(two) > 0){ | 127 if(sum(two) > 0){ |
117 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 128 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
118 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") | 129 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone") |
119 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 130 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") |
120 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 131 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
121 } | 132 } |
122 } | 133 } |
123 if(sum(both) > 0){ | 134 if(sum(both) > 0){ |
124 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] | 135 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] |
125 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) | 136 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3 Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) |
126 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 137 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") |
127 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 138 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
128 } | 139 } |
129 } | 140 } |
130 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2) | 141 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2) |
186 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) | 197 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) |
187 | 198 |
188 interval = intervalReads | 199 interval = intervalReads |
189 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 200 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
190 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 201 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
191 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Clone_Molecule_Count_From_Spikes") | 202 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") |
192 | 203 |
193 cat("</table></html>", file=logfile, append=T) | 204 cat("</table></html>", file=logfile, append=T) |
194 | 205 |
195 | 206 |
196 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ | 207 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ |
208 patientIndex = which(colnames(patient1) == "Patient") | 219 patientIndex = which(colnames(patient1) == "Patient") |
209 oneSample = paste(patient1[1,sampleIndex], sep="") | 220 oneSample = paste(patient1[1,sampleIndex], sep="") |
210 twoSample = paste(patient2[1,sampleIndex], sep="") | 221 twoSample = paste(patient2[1,sampleIndex], sep="") |
211 threeSample = paste(patient3[1,sampleIndex], sep="") | 222 threeSample = paste(patient3[1,sampleIndex], sep="") |
212 | 223 |
213 patientMerge = merge(patient1, patient2, by="Clone_Sequence") | 224 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) |
214 patientMerge = merge(patientMerge, patient3, by="Clone_Sequence") | 225 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) |
215 colnames(patientMerge)[32:length(colnames(patientMerge))] = paste(colnames(patientMerge)[32:length(colnames(patientMerge))], ".z", sep="") | 226 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) |
227 | |
228 patientMerge = merge(patient1, patient2, by="merge") | |
229 patientMerge = merge(patientMerge, patient3, by="merge") | |
230 colnames(patientMerge)[28:length(colnames(patientMerge))] = paste(colnames(patientMerge)[28:length(colnames(patientMerge))], ".z", sep="") | |
216 res1 = vector() | 231 res1 = vector() |
217 res2 = vector() | 232 res2 = vector() |
218 res3 = vector() | 233 res3 = vector() |
219 resAll = vector() | 234 resAll = vector() |
220 read1Count = vector() | 235 read1Count = vector() |
221 read2Count = vector() | 236 read2Count = vector() |
222 read3Count = vector() | 237 read3Count = vector() |
223 | 238 |
224 if(appendTriplets){ | 239 if(appendTriplets){ |
225 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3) | 240 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3) |
226 } | 241 } |
227 for(iter in 1:length(product[,1])){ | 242 for(iter in 1:length(product[,1])){ |
228 threshhold = product[iter,threshholdIndex] | 243 threshhold = product[iter,threshholdIndex] |
229 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 244 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
230 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 245 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
231 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold) | 246 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold) |
232 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence)) | 247 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence)) |
233 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence)) | 248 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence)) |
234 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence)) | 249 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence)) |
235 | 250 |
236 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) | 251 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) |
237 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) | 252 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) |
238 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) | 253 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) |
239 res1 = append(res1, sum(one)) | 254 res1 = append(res1, sum(one)) |
241 res3 = append(res3, sum(three)) | 256 res3 = append(res3, sum(three)) |
242 resAll = append(resAll, sum(all)) | 257 resAll = append(resAll, sum(all)) |
243 #threshhold = 0 | 258 #threshhold = 0 |
244 if(threshhold != 0){ | 259 if(threshhold != 0){ |
245 if(sum(one) > 0){ | 260 if(sum(one) > 0){ |
246 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 261 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
247 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") | 262 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
248 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") | 263 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") |
249 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 264 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
250 } | 265 } |
251 if(sum(two) > 0){ | 266 if(sum(two) > 0){ |
252 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 267 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
253 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") | 268 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
254 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") | 269 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") |
255 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 270 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
256 } | 271 } |
257 if(sum(three) > 0){ | 272 if(sum(three) > 0){ |
258 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 273 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
259 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") | 274 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
260 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 275 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") |
261 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 276 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
262 } | 277 } |
263 } | 278 } |
264 if(sum(all) > 0){ | 279 if(sum(all) > 0){ |
265 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] | 280 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] |
266 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) | 281 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) |
267 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 282 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") |
268 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 283 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
269 } | 284 } |
270 } | 285 } |
271 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) | 286 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) |
308 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) | 323 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) |
309 print(plt) | 324 print(plt) |
310 dev.off() | 325 dev.off() |
311 } | 326 } |
312 | 327 |
328 | |
329 triplets$uniqueID = "ID" | |
330 | |
331 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | |
332 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | |
333 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | |
334 | |
335 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | |
336 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | |
337 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | |
338 | |
339 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" | |
340 | |
341 triplets = data.frame(data.table(triplets)[, list(Patient=unique(.SD$uniqueID), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | |
342 | |
343 triplets$Frequency = (10^as.numeric(triplets$Log10_Frequency))*100 | |
344 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * 1000000 / 2) | |
345 | |
313 interval = intervalReads | 346 interval = intervalReads |
314 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 347 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
315 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 348 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
316 | 349 |
317 one = dat[dat$Patient == "VanDongen_cALL_14696.1",] | 350 one = triplets[triplets$Sample == "14696_reg_BM",] |
318 two = dat[dat$Patient == "VanDongen_cALL_14696.2",] | 351 two = triplets[triplets$Sample == "24536_reg_BM",] |
319 three = dat[dat$Patient == "VanDongen_cALL_14696.3",] | 352 three = triplets[triplets$Sample == "24062_reg_BM",] |
320 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) | 353 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) |
321 | 354 |
322 one = dat[dat$Sample == "16278_Left",] | 355 one = triplets[triplets$Sample == "16278_Left",] |
323 two = dat[dat$Sample == "26402_Left",] | 356 two = triplets[triplets$Sample == "26402_Left",] |
324 three = dat[dat$Sample == "26759_Left",] | 357 three = triplets[triplets$Sample == "26759_Left",] |
325 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) | 358 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) |
326 | 359 |
327 one = dat[dat$Sample == "16278_Right",] | 360 one = triplets[triplets$Sample == "16278_Right",] |
328 two = dat[dat$Sample == "26402_Right",] | 361 two = triplets[triplets$Sample == "26402_Right",] |
329 three = dat[dat$Sample == "26759_Right",] | 362 three = triplets[triplets$Sample == "26759_Right",] |
330 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) | 363 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) |
331 | 364 |
332 | 365 |
333 interval = intervalFreq | 366 interval = intervalFreq |
334 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 367 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
335 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 368 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
336 | 369 |
337 one = dat[dat$Patient == "VanDongen_cALL_14696.1",] | 370 one = triplets[triplets$Sample == "14696_reg_BM",] |
338 two = dat[dat$Patient == "VanDongen_cALL_14696.2",] | 371 two = triplets[triplets$Sample == "24536_reg_BM",] |
339 three = dat[dat$Patient == "VanDongen_cALL_14696.3",] | 372 three = triplets[triplets$Sample == "24062_reg_BM",] |
340 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) | 373 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) |
341 | 374 |
342 one = dat[dat$Sample == "16278_Left",] | 375 one = triplets[triplets$Sample == "16278_Left",] |
343 two = dat[dat$Sample == "26402_Left",] | 376 two = triplets[triplets$Sample == "26402_Left",] |
344 three = dat[dat$Sample == "26759_Left",] | 377 three = triplets[triplets$Sample == "26759_Left",] |
345 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) | 378 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) |
346 | 379 |
347 one = dat[dat$Sample == "16278_Right",] | 380 one = triplets[triplets$Sample == "16278_Right",] |
348 two = dat[dat$Sample == "26402_Right",] | 381 two = triplets[triplets$Sample == "26402_Right",] |
349 three = dat[dat$Sample == "26759_Right",] | 382 three = triplets[triplets$Sample == "26759_Right",] |
350 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) | 383 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) |
351 | |
352 | |
353 |