Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 18:f23d3be6fbc8 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Thu, 19 Feb 2015 09:52:28 -0500 |
| parents | 7a9debac9a97 |
| children | 5f7ed60975bd |
comparison
equal
deleted
inserted
replaced
| 17:7a9debac9a97 | 18:f23d3be6fbc8 |
|---|---|
| 64 x$Sample = factor(x$Sample, levels=unique(x$Sample)) | 64 x$Sample = factor(x$Sample, levels=unique(x$Sample)) |
| 65 onShort = "reads" | 65 onShort = "reads" |
| 66 if(on == "Frequency"){ | 66 if(on == "Frequency"){ |
| 67 onShort = "freq" | 67 onShort = "freq" |
| 68 } | 68 } |
| 69 onx = paste(on, ".x", sep="") | |
| 70 ony = paste(on, ".y", sep="") | |
| 69 splt = split(x, x$Sample, drop=T) | 71 splt = split(x, x$Sample, drop=T) |
| 70 type="pair" | 72 type="pair" |
| 71 if(length(splt) == 1){ | 73 if(length(splt) == 1){ |
| 72 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) | 74 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) |
| 73 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0)) | 75 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0)) |
| 106 patient1$merge = paste(patient1$Clone_Sequence) | 108 patient1$merge = paste(patient1$Clone_Sequence) |
| 107 patient2$merge = paste(patient2$Clone_Sequence) | 109 patient2$merge = paste(patient2$Clone_Sequence) |
| 108 | 110 |
| 109 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | 111 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") |
| 110 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | 112 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") |
| 113 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | |
| 111 res1 = vector() | 114 res1 = vector() |
| 112 res2 = vector() | 115 res2 = vector() |
| 113 resBoth = vector() | 116 resBoth = vector() |
| 114 read1Count = vector() | 117 read1Count = vector() |
| 115 read2Count = vector() | 118 read2Count = vector() |
| 120 #for(iter in 1){ | 123 #for(iter in 1){ |
| 121 for(iter in 1:length(product[,1])){ | 124 for(iter in 1:length(product[,1])){ |
| 122 threshhold = product[iter,threshholdIndex] | 125 threshhold = product[iter,threshholdIndex] |
| 123 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 126 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
| 124 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 127 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
| 125 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold) | 128 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold |
| 126 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)) | 129 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold |
| 127 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)) | 130 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) |
| 131 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence)) | |
| 128 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) | 132 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) |
| 129 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) | 133 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) |
| 130 res1 = append(res1, sum(one)) | 134 res1 = append(res1, sum(one)) |
| 131 res2 = append(res2, sum(two)) | 135 res2 = append(res2, sum(two)) |
| 132 resBoth = append(resBoth, sum(both)) | 136 resBoth = append(resBoth, sum(both)) |
| 224 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ | 228 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ |
| 225 onShort = "reads" | 229 onShort = "reads" |
| 226 if(on == "Frequency"){ | 230 if(on == "Frequency"){ |
| 227 onShort = "freq" | 231 onShort = "freq" |
| 228 } | 232 } |
| 233 onx = paste(on, ".x", sep="") | |
| 234 ony = paste(on, ".y", sep="") | |
| 235 onz = paste(on, ".z", sep="") | |
| 229 type="triplet" | 236 type="triplet" |
| 230 | 237 |
| 231 threshholdIndex = which(colnames(product) == "interval") | 238 threshholdIndex = which(colnames(product) == "interval") |
| 232 V_SegmentIndex = which(colnames(product) == "V_Segments") | 239 V_SegmentIndex = which(colnames(product) == "V_Segments") |
| 233 J_SegmentIndex = which(colnames(product) == "J_Segments") | 240 J_SegmentIndex = which(colnames(product) == "J_Segments") |
| 247 patient3$merge = paste(patient3$Clone_Sequence) | 254 patient3$merge = paste(patient3$Clone_Sequence) |
| 248 | 255 |
| 249 patientMerge = merge(patient1, patient2, by="merge") | 256 patientMerge = merge(patient1, patient2, by="merge") |
| 250 patientMerge = merge(patientMerge, patient3, by="merge") | 257 patientMerge = merge(patientMerge, patient3, by="merge") |
| 251 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="") | 258 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="") |
| 259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | |
| 252 res1 = vector() | 260 res1 = vector() |
| 253 res2 = vector() | 261 res2 = vector() |
| 254 res3 = vector() | 262 res3 = vector() |
| 255 resAll = vector() | 263 resAll = vector() |
| 256 read1Count = vector() | 264 read1Count = vector() |
| 262 } | 270 } |
| 263 for(iter in 1:length(product[,1])){ | 271 for(iter in 1:length(product[,1])){ |
| 264 threshhold = product[iter,threshholdIndex] | 272 threshhold = product[iter,threshholdIndex] |
| 265 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 273 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
| 266 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 274 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
| 267 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold) | 275 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) |
| 268 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) | 276 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) |
| 269 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) | 277 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) |
| 270 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)) | 278 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) |
| 279 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) | |
| 271 | 280 |
| 272 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) | 281 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) |
| 273 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) | 282 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) |
| 274 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count)) | 283 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) |
| 275 res1 = append(res1, sum(one)) | 284 res1 = append(res1, sum(one)) |
| 276 res2 = append(res2, sum(two)) | 285 res2 = append(res2, sum(two)) |
| 277 res3 = append(res3, sum(three)) | 286 res3 = append(res3, sum(three)) |
| 278 resAll = append(resAll, sum(all)) | 287 resAll = append(resAll, sum(all)) |
| 279 #threshhold = 0 | 288 #threshhold = 0 |
| 280 if(threshhold != 0){ | 289 if(threshhold != 0){ |
| 281 if(sum(one) > 0){ | 290 if(sum(one) > 0){ |
| 282 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 291 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
| 283 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 292 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
| 284 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") | 293 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") |
| 285 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 294 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
| 286 } | 295 } |
| 287 if(sum(two) > 0){ | 296 if(sum(two) > 0){ |
| 288 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 297 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
| 289 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 298 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
| 290 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") | 299 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") |
| 291 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 300 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
| 292 } | 301 } |
| 293 if(sum(three) > 0){ | 302 if(sum(three) > 0){ |
| 294 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 303 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] |
| 295 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 304 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") |
| 296 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 305 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") |
| 297 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 306 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
| 298 } | 307 } |
| 299 } | 308 } |
| 300 if(sum(all) > 0){ | 309 if(sum(all) > 0){ |
| 301 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] | 310 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] |
| 302 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) | 311 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) |
| 303 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 312 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") |
| 304 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 313 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
| 305 } | 314 } |
| 306 } | 315 } |
| 307 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) | 316 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) |
