Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 18:f23d3be6fbc8 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 19 Feb 2015 09:52:28 -0500 |
parents | 7a9debac9a97 |
children | 5f7ed60975bd |
comparison
equal
deleted
inserted
replaced
17:7a9debac9a97 | 18:f23d3be6fbc8 |
---|---|
64 x$Sample = factor(x$Sample, levels=unique(x$Sample)) | 64 x$Sample = factor(x$Sample, levels=unique(x$Sample)) |
65 onShort = "reads" | 65 onShort = "reads" |
66 if(on == "Frequency"){ | 66 if(on == "Frequency"){ |
67 onShort = "freq" | 67 onShort = "freq" |
68 } | 68 } |
69 onx = paste(on, ".x", sep="") | |
70 ony = paste(on, ".y", sep="") | |
69 splt = split(x, x$Sample, drop=T) | 71 splt = split(x, x$Sample, drop=T) |
70 type="pair" | 72 type="pair" |
71 if(length(splt) == 1){ | 73 if(length(splt) == 1){ |
72 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) | 74 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) |
73 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0)) | 75 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0)) |
106 patient1$merge = paste(patient1$Clone_Sequence) | 108 patient1$merge = paste(patient1$Clone_Sequence) |
107 patient2$merge = paste(patient2$Clone_Sequence) | 109 patient2$merge = paste(patient2$Clone_Sequence) |
108 | 110 |
109 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | 111 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") |
110 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | 112 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") |
113 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | |
111 res1 = vector() | 114 res1 = vector() |
112 res2 = vector() | 115 res2 = vector() |
113 resBoth = vector() | 116 resBoth = vector() |
114 read1Count = vector() | 117 read1Count = vector() |
115 read2Count = vector() | 118 read2Count = vector() |
120 #for(iter in 1){ | 123 #for(iter in 1){ |
121 for(iter in 1:length(product[,1])){ | 124 for(iter in 1:length(product[,1])){ |
122 threshhold = product[iter,threshholdIndex] | 125 threshhold = product[iter,threshholdIndex] |
123 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 126 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
124 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 127 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
125 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold) | 128 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold |
126 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)) | 129 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold |
127 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)) | 130 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) |
131 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) | |
128 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) | 132 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) |
129 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) | 133 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) |
130 res1 = append(res1, sum(one)) | 134 res1 = append(res1, sum(one)) |
131 res2 = append(res2, sum(two)) | 135 res2 = append(res2, sum(two)) |
132 resBoth = append(resBoth, sum(both)) | 136 resBoth = append(resBoth, sum(both)) |
224 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ | 228 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ |
225 onShort = "reads" | 229 onShort = "reads" |
226 if(on == "Frequency"){ | 230 if(on == "Frequency"){ |
227 onShort = "freq" | 231 onShort = "freq" |
228 } | 232 } |
233 onx = paste(on, ".x", sep="") | |
234 ony = paste(on, ".y", sep="") | |
235 onz = paste(on, ".z", sep="") | |
229 type="triplet" | 236 type="triplet" |
230 | 237 |
231 threshholdIndex = which(colnames(product) == "interval") | 238 threshholdIndex = which(colnames(product) == "interval") |
232 V_SegmentIndex = which(colnames(product) == "V_Segments") | 239 V_SegmentIndex = which(colnames(product) == "V_Segments") |
233 J_SegmentIndex = which(colnames(product) == "J_Segments") | 240 J_SegmentIndex = which(colnames(product) == "J_Segments") |
247 patient3$merge = paste(patient3$Clone_Sequence) | 254 patient3$merge = paste(patient3$Clone_Sequence) |
248 | 255 |
249 patientMerge = merge(patient1, patient2, by="merge") | 256 patientMerge = merge(patient1, patient2, by="merge") |
250 patientMerge = merge(patientMerge, patient3, by="merge") | 257 patientMerge = merge(patientMerge, patient3, by="merge") |
251 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="") | 258 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="") |
259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | |
252 res1 = vector() | 260 res1 = vector() |
253 res2 = vector() | 261 res2 = vector() |
254 res3 = vector() | 262 res3 = vector() |
255 resAll = vector() | 263 resAll = vector() |
256 read1Count = vector() | 264 read1Count = vector() |
262 } | 270 } |
263 for(iter in 1:length(product[,1])){ | 271 for(iter in 1:length(product[,1])){ |
264 threshhold = product[iter,threshholdIndex] | 272 threshhold = product[iter,threshholdIndex] |
265 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 273 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
266 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 274 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
267 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold) | 275 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) |
268 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) | 276 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) |
269 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) | 277 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) |
270 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) | 278 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) |
279 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) | |
271 | 280 |
272 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) | 281 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) |
273 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) | 282 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) |
274 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count)) | 283 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) |
275 res1 = append(res1, sum(one)) | 284 res1 = append(res1, sum(one)) |
276 res2 = append(res2, sum(two)) | 285 res2 = append(res2, sum(two)) |
277 res3 = append(res3, sum(three)) | 286 res3 = append(res3, sum(three)) |
278 resAll = append(resAll, sum(all)) | 287 resAll = append(resAll, sum(all)) |
279 #threshhold = 0 | 288 #threshhold = 0 |
280 if(threshhold != 0){ | 289 if(threshhold != 0){ |
281 if(sum(one) > 0){ | 290 if(sum(one) > 0){ |
282 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 291 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
283 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 292 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
284 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") | 293 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") |
285 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 294 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
286 } | 295 } |
287 if(sum(two) > 0){ | 296 if(sum(two) > 0){ |
288 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 297 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
289 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 298 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
290 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") | 299 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") |
291 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 300 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
292 } | 301 } |
293 if(sum(three) > 0){ | 302 if(sum(three) > 0){ |
294 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 303 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
295 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 304 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
296 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 305 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") |
297 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 306 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
298 } | 307 } |
299 } | 308 } |
300 if(sum(all) > 0){ | 309 if(sum(all) > 0){ |
301 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] | 310 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] |
302 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) | 311 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) |
303 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 312 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") |
304 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 313 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
305 } | 314 } |
306 } | 315 } |
307 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) | 316 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) |