Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 29:5ab17bdf2530 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 22 May 2015 09:06:04 -0400 |
parents | a63ccc36f5a4 |
children | 45554fd15511 |
comparison
equal
deleted
inserted
replaced
28:a63ccc36f5a4 | 29:5ab17bdf2530 |
---|---|
3 inFile = args[1] | 3 inFile = args[1] |
4 outDir = args[2] | 4 outDir = args[2] |
5 logfile = args[3] | 5 logfile = args[3] |
6 min_freq = as.numeric(args[4]) | 6 min_freq = as.numeric(args[4]) |
7 min_cells = as.numeric(args[5]) | 7 min_cells = as.numeric(args[5]) |
8 mergeOn = args[6] | |
8 | 9 |
9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F) | 10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F) |
10 | 11 |
11 library(ggplot2) | 12 library(ggplot2) |
12 library(reshape2) | 13 library(reshape2) |
49 | 50 |
50 dat = dat[dat$normalized_read_count >= min_cells,] | 51 dat = dat[dat$normalized_read_count >= min_cells,] |
51 | 52 |
52 dat$paste = paste(dat$Sample, dat$Clone_Sequence) | 53 dat$paste = paste(dat$Sample, dat$Clone_Sequence) |
53 | 54 |
55 cat("<tr><td>Adding duplicate V+J+CDR3 sequences</td></tr>", file=logfile, append=T) | |
56 #remove duplicate V+J+CDR3, add together numerical values | |
57 dat= data.frame(data.table(dat)[, list(Receptor=unique(.SD$Receptor), | |
58 Cell_Count=unique(.SD$Cell_Count), | |
59 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), | |
60 Total_Read_Count=sum(.SD$Total_Read_Count), | |
61 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0), | |
62 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone), | |
63 Frequency=sum(.SD$Frequency), | |
64 locus_V=unique(.SD$locus_V), | |
65 locus_J=unique(.SD$locus_J), | |
66 min_cell_count=unique(.SD$min_cell_count), | |
67 normalized_read_count=sum(.SD$normalized_read_count), | |
68 Log10_Frequency=sum(.SD$Log10_Frequency), | |
69 Clone_Sequence=.SD$Clone_Sequence[1], | |
70 min_cell_paste=.SD$min_cell_paste[1], | |
71 paste=unique(.SD$paste)), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | |
72 | |
73 | |
54 patients = split(dat, dat$Patient, drop=T) | 74 patients = split(dat, dat$Patient, drop=T) |
55 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000)) | 75 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000)) |
56 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5)) | 76 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5)) |
57 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV") | 77 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV") |
58 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") | 78 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") |
59 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb") | 79 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb") |
60 Titles = factor(Titles, levels=Titles) | 80 Titles = factor(Titles, levels=Titles) |
61 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) | 81 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) |
82 | |
83 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0)) | |
62 | 84 |
63 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ | 85 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ |
64 if (!is.data.frame(x) & is.list(x)){ | 86 if (!is.data.frame(x) & is.list(x)){ |
65 x = x[[1]] | 87 x = x[[1]] |
66 } | 88 } |
104 if(appendtxt){ | 126 if(appendtxt){ |
105 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) | 127 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) |
106 } | 128 } |
107 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T) | 129 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T) |
108 | 130 |
109 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 131 if(mergeOn == "Clone_Sequence"){ |
110 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 132 patient1$merge = paste(patient1$Clone_Sequence) |
111 patient1$merge = paste(patient1$Clone_Sequence) | 133 patient2$merge = paste(patient2$Clone_Sequence) |
112 patient2$merge = paste(patient2$Clone_Sequence) | 134 } else { |
113 | 135 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) |
114 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | 136 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) |
137 } | |
138 | |
139 scatterplot_data_columns = c("Patient", "Sample", "Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene") | |
140 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns]) | |
141 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),] | |
142 scatterplot_data$type = factor(x="In one", levels=c("In one", "In Both")) | |
143 scatterplot_data$on = onShort | |
144 | |
115 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | 145 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") |
116 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | 146 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) |
117 res1 = vector() | 147 res1 = vector() |
118 res2 = vector() | 148 res2 = vector() |
119 resBoth = vector() | 149 resBoth = vector() |
120 read1Count = vector() | 150 read1Count = vector() |
121 read2Count = vector() | 151 read2Count = vector() |
122 locussum1 = vector() | 152 locussum1 = vector() |
123 locussum2 = vector() | 153 locussum2 = vector() |
124 | 154 |
125 print(patient) | |
126 #for(iter in 1){ | 155 #for(iter in 1){ |
127 for(iter in 1:length(product[,1])){ | 156 for(iter in 1:length(product[,1])){ |
128 threshhold = product[iter,threshholdIndex] | 157 threshhold = product[iter,threshholdIndex] |
129 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 158 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") |
130 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 159 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") |
131 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold | 160 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold |
132 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold | 161 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold |
133 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge)) | 162 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge)) |
134 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge)) | 163 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge)) |
135 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) | 164 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) |
136 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) | 165 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) |
137 res1 = append(res1, sum(one)) | 166 res1 = append(res1, sum(one)) |
151 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] | 180 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] |
152 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") | 181 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") |
153 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 182 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") |
154 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 183 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
155 } | 184 } |
185 } else { | |
186 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] | |
187 if(nrow(scatterplot_locus_data) > 0){ | |
188 scatterplot_locus_data$Rearrangement = product[iter, titleIndex] | |
189 } | |
190 in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x) | |
191 if(any(in_two)){ | |
192 scatterplot_locus_data[in_two,]$type = "In Both" | |
193 } | |
194 if(type == "single"){ | |
195 single_patients <<- rbind(single_patients, scatterplot_locus_data) | |
196 } | |
197 p = NULL | |
198 if(nrow(scatterplot_locus_data) != 0){ | |
199 if(on == "normalized_read_count"){ | |
200 scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) | |
201 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) | |
202 } else { | |
203 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) | |
204 } | |
205 p = p + geom_point(aes(colour=type), position="jitter") | |
206 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) | |
207 } else { | |
208 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) | |
209 } | |
210 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep="")) | |
211 print(p) | |
212 dev.off() | |
156 } | 213 } |
157 if(sum(both) > 0){ | 214 if(sum(both) > 0){ |
158 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] | 215 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] |
159 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) | 216 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) |
160 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 217 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") |
161 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 218 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) |
162 } | 219 } |
163 } | 220 } |
164 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2) | 221 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2) |
165 if(sum(is.na(patientResult$percentage)) > 0){ | 222 if(sum(is.na(patientResult$percentage)) > 0){ |
166 patientResult[is.na(patientResult$percentage),]$percentage = 0 | 223 patientResult[is.na(patientResult$percentage),]$percentage = 0 |
167 } | 224 } |
213 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) | 270 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) |
214 | 271 |
215 interval = intervalFreq | 272 interval = intervalFreq |
216 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 273 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
217 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 274 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
218 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) | 275 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) |
219 | 276 |
220 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) | 277 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) |
221 | 278 |
222 interval = intervalReads | 279 interval = intervalReads |
223 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 280 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
224 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 281 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
225 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") | 282 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") |
226 | 283 |
227 cat("</table></html>", file=logfile, append=T) | 284 cat("</table></html>", file=logfile, append=T) |
228 | 285 |
229 | 286 scales = 10^(0:ceiling(log10(max(single_patients$normalized_read_count)))) |
287 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) | |
288 p = p + geom_point(aes(colour=type), position="jitter") | |
289 p = p + xlab("In one or both samples") + ylab("Reads") | |
290 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample") | |
291 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080) | |
292 print(p) | |
293 dev.off() | |
294 | |
295 p = ggplot(single_patients, aes(Rearrangement, Frequency)) | |
296 p = p + geom_point(aes(colour=type), position="jitter") | |
297 p = p + xlab("In one or both samples") + ylab("Frequency") | |
298 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample") | |
299 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080) | |
300 print(p) | |
301 dev.off() | |
230 | 302 |
231 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ | 303 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ |
232 onShort = "reads" | 304 onShort = "reads" |
233 if(on == "Frequency"){ | 305 if(on == "Frequency"){ |
234 onShort = "freq" | 306 onShort = "freq" |
246 patientIndex = which(colnames(patient1) == "Patient") | 318 patientIndex = which(colnames(patient1) == "Patient") |
247 oneSample = paste(patient1[1,sampleIndex], sep="") | 319 oneSample = paste(patient1[1,sampleIndex], sep="") |
248 twoSample = paste(patient2[1,sampleIndex], sep="") | 320 twoSample = paste(patient2[1,sampleIndex], sep="") |
249 threeSample = paste(patient3[1,sampleIndex], sep="") | 321 threeSample = paste(patient3[1,sampleIndex], sep="") |
250 | 322 |
251 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 323 if(mergeOn == "Clone_Sequence"){ |
252 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 324 patient1$merge = paste(patient1$Clone_Sequence) |
253 #patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) | 325 patient2$merge = paste(patient2$Clone_Sequence) |
254 | 326 patient3$merge = paste(patient3$Clone_Sequence) |
255 patient1$merge = paste(patient1$Clone_Sequence) | 327 |
256 patient2$merge = paste(patient2$Clone_Sequence) | 328 } else { |
257 patient3$merge = paste(patient3$Clone_Sequence) | 329 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) |
330 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | |
331 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) | |
332 } | |
258 | 333 |
259 patientMerge = merge(patient1, patient2, by="merge") | 334 patientMerge = merge(patient1, patient2, by="merge") |
260 patientMerge = merge(patientMerge, patient3, by="merge") | 335 patientMerge = merge(patientMerge, patient3, by="merge") |
261 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") | 336 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") |
262 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) | 337 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) |
463 | 538 |
464 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") | 539 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") |
465 | 540 |
466 triplets = triplets[,!(colnames(triplets) %in% column_drops)] | 541 triplets = triplets[,!(colnames(triplets) %in% column_drops)] |
467 | 542 |
543 #remove duplicate V+J+CDR3, add together numerical values | |
544 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor), | |
545 Cell_Count=unique(.SD$Cell_Count), | |
546 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), | |
547 Total_Read_Count=sum(.SD$Total_Read_Count), | |
548 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0), | |
549 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone), | |
550 Frequency=sum(.SD$Frequency), | |
551 normalized_read_count=sum(.SD$normalized_read_count), | |
552 Log10_Frequency=sum(.SD$Log10_Frequency), | |
553 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | |
554 | |
555 | |
468 interval = intervalReads | 556 interval = intervalReads |
469 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 557 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) |
470 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 558 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) |
471 | 559 |
472 one = triplets[triplets$Sample == "14696_reg_BM",] | 560 one = triplets[triplets$Sample == "14696_reg_BM",] |