Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 30:45554fd15511 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 26 May 2015 10:37:51 -0400 |
parents | 5ab17bdf2530 |
children | ce8bd23d0335 |
comparison
equal
deleted
inserted
replaced
29:5ab17bdf2530 | 30:45554fd15511 |
---|---|
134 } else { | 134 } else { |
135 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 135 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) |
136 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 136 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) |
137 } | 137 } |
138 | 138 |
139 scatterplot_data_columns = c("Patient", "Sample", "Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene") | 139 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") |
140 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns]) | 140 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns]) |
141 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),] | 141 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] |
142 scatterplot_data$type = factor(x="In one", levels=c("In one", "In Both")) | 142 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both")) |
143 scatterplot_data$on = onShort | 143 scatterplot_data$on = onShort |
144 | 144 |
145 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | 145 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") |
146 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | 146 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) |
147 res1 = vector() | 147 res1 = vector() |
157 threshhold = product[iter,threshholdIndex] | 157 threshhold = product[iter,threshholdIndex] |
158 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 158 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
159 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 159 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
160 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold | 160 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold |
161 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold | 161 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold |
162 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge)) | 162 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge)) |
163 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge)) | 163 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge)) |
164 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) | 164 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) |
165 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) | 165 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) |
166 res1 = append(res1, sum(one)) | 166 res1 = append(res1, sum(one)) |
167 res2 = append(res2, sum(two)) | 167 res2 = append(res2, sum(two)) |
168 resBoth = append(resBoth, sum(both)) | 168 resBoth = append(resBoth, sum(both)) |
185 } else { | 185 } else { |
186 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] | 186 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] |
187 if(nrow(scatterplot_locus_data) > 0){ | 187 if(nrow(scatterplot_locus_data) > 0){ |
188 scatterplot_locus_data$Rearrangement = product[iter, titleIndex] | 188 scatterplot_locus_data$Rearrangement = product[iter, titleIndex] |
189 } | 189 } |
190 in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x) | 190 in_both = (scatterplot_locus_data$merge %in% patientMerge[both,]$merge) |
191 if(any(in_two)){ | 191 if(any(in_both)){ |
192 scatterplot_locus_data[in_two,]$type = "In Both" | 192 scatterplot_locus_data[in_both,]$type = "In Both" |
193 } | |
194 in_one = (scatterplot_locus_data$merge %in% patient1$merge) | |
195 not_in_one = !in_one | |
196 if(any(not_in_one)){ | |
197 scatterplot_locus_data[not_in_one,]$type = twoSample | |
193 } | 198 } |
194 if(type == "single"){ | 199 if(type == "single"){ |
195 single_patients <<- rbind(single_patients, scatterplot_locus_data) | 200 single_patients <<- rbind(single_patients, scatterplot_locus_data) |
196 } | 201 } |
197 p = NULL | 202 p = NULL |
198 if(nrow(scatterplot_locus_data) != 0){ | 203 if(nrow(scatterplot_locus_data) != 0){ |
199 if(on == "normalized_read_count"){ | 204 if(on == "normalized_read_count"){ |
200 scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | 205 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) |
201 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) | 206 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) |
202 } else { | 207 } else { |
203 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) | 208 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) |
204 } | 209 } |
205 p = p + geom_point(aes(colour=type), position="jitter") | 210 p = p + geom_point(aes(colour=type), position="jitter") |
206 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) | 211 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) |
207 } else { | 212 } else { |
208 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) | 213 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) |
341 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) | 346 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony]) |
342 patientMerge23 = merge(patient2, patient3, by="merge") | 347 patientMerge23 = merge(patient2, patient3, by="merge") |
343 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) | 348 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) |
344 | 349 |
345 | 350 |
346 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene") | 351 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") |
347 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) | 352 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) |
348 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),] | 353 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] |
349 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) | 354 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) |
350 | 355 |
351 res1 = vector() | 356 res1 = vector() |
352 res2 = vector() | 357 res2 = vector() |
353 res3 = vector() | 358 res3 = vector() |
367 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 372 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
368 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 373 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
369 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) | 374 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) |
370 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) | 375 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) |
371 | 376 |
372 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$Clone_Sequence.x %in% patientMerge[all,]$merge)) | 377 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge)) |
373 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$Clone_Sequence.x %in% patientMerge[all,]$merge)) | 378 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge)) |
374 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$Clone_Sequence.x %in% patientMerge[all,]$merge)) | 379 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge)) |
375 | 380 |
376 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient1$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient1$Clone_Sequence %in% patientMerge13[one_three,]$merge)) | 381 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge)) |
377 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient2$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient2$Clone_Sequence %in% patientMerge23[two_three,]$merge)) | 382 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge)) |
378 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient3$Clone_Sequence %in% patientMerge13[one_three,]$merge) & !(patient3$Clone_Sequence %in% patientMerge23[two_three,]$merge)) | 383 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge)) |
379 | 384 |
380 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) | 385 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) |
381 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) | 386 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) |
382 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) | 387 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) |
383 res1 = append(res1, sum(one)) | 388 res1 = append(res1, sum(one)) |
425 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") | 430 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="") |
426 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 431 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
427 } | 432 } |
428 } else { #scatterplot data | 433 } else { #scatterplot data |
429 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] | 434 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] |
430 in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge12[one_two,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge13[one_three,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge23[two_three,]$Clone_Sequence.x) | 435 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge) |
431 if(sum(in_two) > 0){ | 436 if(sum(in_two) > 0){ |
432 scatterplot_locus_data[in_two,]$type = "In two" | 437 scatterplot_locus_data[in_two,]$type = "In two" |
433 } | 438 } |
434 in_three = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x) | 439 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge) |
435 if(sum(in_three)> 0){ | 440 if(sum(in_three)> 0){ |
436 scatterplot_locus_data[in_three,]$type = "In three" | 441 scatterplot_locus_data[in_three,]$type = "In three" |
437 } | 442 } |
438 not_in_one = scatterplot_locus_data$type != "In one" | 443 not_in_one = scatterplot_locus_data$type != "In one" |
439 if(sum(not_in_one) > 0){ | 444 if(sum(not_in_one) > 0){ |