comparison RScript.r @ 18:f23d3be6fbc8 draft

Uploaded
author davidvanzessen
date Thu, 19 Feb 2015 09:52:28 -0500
parents 7a9debac9a97
children 5f7ed60975bd
comparison
equal deleted inserted replaced
17:7a9debac9a97 18:f23d3be6fbc8
64 x$Sample = factor(x$Sample, levels=unique(x$Sample)) 64 x$Sample = factor(x$Sample, levels=unique(x$Sample))
65 onShort = "reads" 65 onShort = "reads"
66 if(on == "Frequency"){ 66 if(on == "Frequency"){
67 onShort = "freq" 67 onShort = "freq"
68 } 68 }
69 onx = paste(on, ".x", sep="")
70 ony = paste(on, ".y", sep="")
69 splt = split(x, x$Sample, drop=T) 71 splt = split(x, x$Sample, drop=T)
70 type="pair" 72 type="pair"
71 if(length(splt) == 1){ 73 if(length(splt) == 1){
72 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) 74 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
73 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0)) 75 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
106 patient1$merge = paste(patient1$Clone_Sequence) 108 patient1$merge = paste(patient1$Clone_Sequence)
107 patient2$merge = paste(patient2$Clone_Sequence) 109 patient2$merge = paste(patient2$Clone_Sequence)
108 110
109 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") 111 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
110 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") 112 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
113 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
111 res1 = vector() 114 res1 = vector()
112 res2 = vector() 115 res2 = vector()
113 resBoth = vector() 116 resBoth = vector()
114 read1Count = vector() 117 read1Count = vector()
115 read2Count = vector() 118 read2Count = vector()
120 #for(iter in 1){ 123 #for(iter in 1){
121 for(iter in 1:length(product[,1])){ 124 for(iter in 1:length(product[,1])){
122 threshhold = product[iter,threshholdIndex] 125 threshhold = product[iter,threshholdIndex]
123 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") 126 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
124 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") 127 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
125 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold) 128 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
126 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)) 129 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold
127 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)) 130 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence))
131 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence))
128 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) 132 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
129 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) 133 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
130 res1 = append(res1, sum(one)) 134 res1 = append(res1, sum(one))
131 res2 = append(res2, sum(two)) 135 res2 = append(res2, sum(two))
132 resBoth = append(resBoth, sum(both)) 136 resBoth = append(resBoth, sum(both))
224 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ 228 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
225 onShort = "reads" 229 onShort = "reads"
226 if(on == "Frequency"){ 230 if(on == "Frequency"){
227 onShort = "freq" 231 onShort = "freq"
228 } 232 }
233 onx = paste(on, ".x", sep="")
234 ony = paste(on, ".y", sep="")
235 onz = paste(on, ".z", sep="")
229 type="triplet" 236 type="triplet"
230 237
231 threshholdIndex = which(colnames(product) == "interval") 238 threshholdIndex = which(colnames(product) == "interval")
232 V_SegmentIndex = which(colnames(product) == "V_Segments") 239 V_SegmentIndex = which(colnames(product) == "V_Segments")
233 J_SegmentIndex = which(colnames(product) == "J_Segments") 240 J_SegmentIndex = which(colnames(product) == "J_Segments")
247 patient3$merge = paste(patient3$Clone_Sequence) 254 patient3$merge = paste(patient3$Clone_Sequence)
248 255
249 patientMerge = merge(patient1, patient2, by="merge") 256 patientMerge = merge(patient1, patient2, by="merge")
250 patientMerge = merge(patientMerge, patient3, by="merge") 257 patientMerge = merge(patientMerge, patient3, by="merge")
251 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="") 258 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="")
259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
252 res1 = vector() 260 res1 = vector()
253 res2 = vector() 261 res2 = vector()
254 res3 = vector() 262 res3 = vector()
255 resAll = vector() 263 resAll = vector()
256 read1Count = vector() 264 read1Count = vector()
262 } 270 }
263 for(iter in 1:length(product[,1])){ 271 for(iter in 1:length(product[,1])){
264 threshhold = product[iter,threshholdIndex] 272 threshhold = product[iter,threshholdIndex]
265 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") 273 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
266 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") 274 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
267 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold) 275 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
268 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) 276 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
269 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) 277 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
270 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) 278 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
279 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x))
271 280
272 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) 281 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
273 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) 282 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
274 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count)) 283 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
275 res1 = append(res1, sum(one)) 284 res1 = append(res1, sum(one))
276 res2 = append(res2, sum(two)) 285 res2 = append(res2, sum(two))
277 res3 = append(res3, sum(three)) 286 res3 = append(res3, sum(three))
278 resAll = append(resAll, sum(all)) 287 resAll = append(resAll, sum(all))
279 #threshhold = 0 288 #threshhold = 0
280 if(threshhold != 0){ 289 if(threshhold != 0){
281 if(sum(one) > 0){ 290 if(sum(one) > 0){
282 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] 291 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
283 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") 292 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
284 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") 293 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
285 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 294 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
286 } 295 }
287 if(sum(two) > 0){ 296 if(sum(two) > 0){
288 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] 297 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
289 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") 298 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
290 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") 299 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
291 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 300 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
292 } 301 }
293 if(sum(three) > 0){ 302 if(sum(three) > 0){
294 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] 303 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
295 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") 304 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
296 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") 305 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
297 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 306 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
298 } 307 }
299 } 308 }
300 if(sum(all) > 0){ 309 if(sum(all) > 0){
301 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] 310 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
302 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) 311 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
303 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") 312 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
304 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 313 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
305 } 314 }
306 } 315 }
307 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) 316 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))