comparison RScript.r @ 29:5ab17bdf2530 draft

Uploaded
author davidvanzessen
date Fri, 22 May 2015 09:06:04 -0400
parents a63ccc36f5a4
children 45554fd15511
comparison
equal deleted inserted replaced
28:a63ccc36f5a4 29:5ab17bdf2530
3 inFile = args[1] 3 inFile = args[1]
4 outDir = args[2] 4 outDir = args[2]
5 logfile = args[3] 5 logfile = args[3]
6 min_freq = as.numeric(args[4]) 6 min_freq = as.numeric(args[4])
7 min_cells = as.numeric(args[5]) 7 min_cells = as.numeric(args[5])
8 mergeOn = args[6]
8 9
9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F) 10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
10 11
11 library(ggplot2) 12 library(ggplot2)
12 library(reshape2) 13 library(reshape2)
49 50
50 dat = dat[dat$normalized_read_count >= min_cells,] 51 dat = dat[dat$normalized_read_count >= min_cells,]
51 52
52 dat$paste = paste(dat$Sample, dat$Clone_Sequence) 53 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
53 54
55 cat("<tr><td>Adding duplicate V+J+CDR3 sequences</td></tr>", file=logfile, append=T)
56 #remove duplicate V+J+CDR3, add together numerical values
57 dat= data.frame(data.table(dat)[, list(Receptor=unique(.SD$Receptor),
58 Cell_Count=unique(.SD$Cell_Count),
59 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
60 Total_Read_Count=sum(.SD$Total_Read_Count),
61 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
62 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
63 Frequency=sum(.SD$Frequency),
64 locus_V=unique(.SD$locus_V),
65 locus_J=unique(.SD$locus_J),
66 min_cell_count=unique(.SD$min_cell_count),
67 normalized_read_count=sum(.SD$normalized_read_count),
68 Log10_Frequency=sum(.SD$Log10_Frequency),
69 Clone_Sequence=.SD$Clone_Sequence[1],
70 min_cell_paste=.SD$min_cell_paste[1],
71 paste=unique(.SD$paste)), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
72
73
54 patients = split(dat, dat$Patient, drop=T) 74 patients = split(dat, dat$Patient, drop=T)
55 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000)) 75 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
56 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5)) 76 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
57 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV") 77 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
58 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") 78 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
59 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb") 79 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
60 Titles = factor(Titles, levels=Titles) 80 Titles = factor(Titles, levels=Titles)
61 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) 81 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
82
83 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
62 84
63 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ 85 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
64 if (!is.data.frame(x) & is.list(x)){ 86 if (!is.data.frame(x) & is.list(x)){
65 x = x[[1]] 87 x = x[[1]]
66 } 88 }
104 if(appendtxt){ 126 if(appendtxt){
105 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) 127 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
106 } 128 }
107 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T) 129 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
108 130
109 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) 131 if(mergeOn == "Clone_Sequence"){
110 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) 132 patient1$merge = paste(patient1$Clone_Sequence)
111 patient1$merge = paste(patient1$Clone_Sequence) 133 patient2$merge = paste(patient2$Clone_Sequence)
112 patient2$merge = paste(patient2$Clone_Sequence) 134 } else {
113 135 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
114 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") 136 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
137 }
138
139 scatterplot_data_columns = c("Patient", "Sample", "Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene")
140 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
141 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),]
142 scatterplot_data$type = factor(x="In one", levels=c("In one", "In Both"))
143 scatterplot_data$on = onShort
144
115 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") 145 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
116 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) 146 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
117 res1 = vector() 147 res1 = vector()
118 res2 = vector() 148 res2 = vector()
119 resBoth = vector() 149 resBoth = vector()
120 read1Count = vector() 150 read1Count = vector()
121 read2Count = vector() 151 read2Count = vector()
122 locussum1 = vector() 152 locussum1 = vector()
123 locussum2 = vector() 153 locussum2 = vector()
124 154
125 print(patient)
126 #for(iter in 1){ 155 #for(iter in 1){
127 for(iter in 1:length(product[,1])){ 156 for(iter in 1:length(product[,1])){
128 threshhold = product[iter,threshholdIndex] 157 threshhold = product[iter,threshholdIndex]
129 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") 158 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
130 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") 159 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
131 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold 160 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
132 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold 161 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
133 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge)) 162 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge))
134 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge)) 163 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge))
135 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) 164 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
136 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) 165 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
137 res1 = append(res1, sum(one)) 166 res1 = append(res1, sum(one))
151 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")] 180 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
152 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone") 181 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
153 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") 182 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
154 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 183 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
155 } 184 }
185 } else {
186 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
187 if(nrow(scatterplot_locus_data) > 0){
188 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
189 }
190 in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)
191 if(any(in_two)){
192 scatterplot_locus_data[in_two,]$type = "In Both"
193 }
194 if(type == "single"){
195 single_patients <<- rbind(single_patients, scatterplot_locus_data)
196 }
197 p = NULL
198 if(nrow(scatterplot_locus_data) != 0){
199 if(on == "normalized_read_count"){
200 scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
201 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
202 } else {
203 p = ggplot(scatterplot_locus_data, aes(type, Frequency))
204 }
205 p = p + geom_point(aes(colour=type), position="jitter")
206 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
207 } else {
208 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
209 }
210 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
211 print(p)
212 dev.off()
156 } 213 }
157 if(sum(both) > 0){ 214 if(sum(both) > 0){
158 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] 215 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
159 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) 216 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
160 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") 217 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
161 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) 218 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
162 } 219 }
163 } 220 }
164 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2) 221 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
165 if(sum(is.na(patientResult$percentage)) > 0){ 222 if(sum(is.na(patientResult$percentage)) > 0){
166 patientResult[is.na(patientResult$percentage),]$percentage = 0 223 patientResult[is.na(patientResult$percentage),]$percentage = 0
167 } 224 }
213 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) 270 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
214 271
215 interval = intervalFreq 272 interval = intervalFreq
216 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 273 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
217 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 274 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
218 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) 275 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
219 276
220 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) 277 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
221 278
222 interval = intervalReads 279 interval = intervalReads
223 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 280 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
224 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 281 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
225 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") 282 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
226 283
227 cat("</table></html>", file=logfile, append=T) 284 cat("</table></html>", file=logfile, append=T)
228 285
229 286 scales = 10^(0:ceiling(log10(max(single_patients$normalized_read_count))))
287 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
288 p = p + geom_point(aes(colour=type), position="jitter")
289 p = p + xlab("In one or both samples") + ylab("Reads")
290 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
291 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
292 print(p)
293 dev.off()
294
295 p = ggplot(single_patients, aes(Rearrangement, Frequency))
296 p = p + geom_point(aes(colour=type), position="jitter")
297 p = p + xlab("In one or both samples") + ylab("Frequency")
298 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
299 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
300 print(p)
301 dev.off()
230 302
231 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ 303 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
232 onShort = "reads" 304 onShort = "reads"
233 if(on == "Frequency"){ 305 if(on == "Frequency"){
234 onShort = "freq" 306 onShort = "freq"
246 patientIndex = which(colnames(patient1) == "Patient") 318 patientIndex = which(colnames(patient1) == "Patient")
247 oneSample = paste(patient1[1,sampleIndex], sep="") 319 oneSample = paste(patient1[1,sampleIndex], sep="")
248 twoSample = paste(patient2[1,sampleIndex], sep="") 320 twoSample = paste(patient2[1,sampleIndex], sep="")
249 threeSample = paste(patient3[1,sampleIndex], sep="") 321 threeSample = paste(patient3[1,sampleIndex], sep="")
250 322
251 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) 323 if(mergeOn == "Clone_Sequence"){
252 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) 324 patient1$merge = paste(patient1$Clone_Sequence)
253 #patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) 325 patient2$merge = paste(patient2$Clone_Sequence)
254 326 patient3$merge = paste(patient3$Clone_Sequence)
255 patient1$merge = paste(patient1$Clone_Sequence) 327
256 patient2$merge = paste(patient2$Clone_Sequence) 328 } else {
257 patient3$merge = paste(patient3$Clone_Sequence) 329 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
330 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
331 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
332 }
258 333
259 patientMerge = merge(patient1, patient2, by="merge") 334 patientMerge = merge(patient1, patient2, by="merge")
260 patientMerge = merge(patientMerge, patient3, by="merge") 335 patientMerge = merge(patientMerge, patient3, by="merge")
261 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="") 336 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
262 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz]) 337 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
463 538
464 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste") 539 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
465 540
466 triplets = triplets[,!(colnames(triplets) %in% column_drops)] 541 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
467 542
543 #remove duplicate V+J+CDR3, add together numerical values
544 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
545 Cell_Count=unique(.SD$Cell_Count),
546 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
547 Total_Read_Count=sum(.SD$Total_Read_Count),
548 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
549 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
550 Frequency=sum(.SD$Frequency),
551 normalized_read_count=sum(.SD$normalized_read_count),
552 Log10_Frequency=sum(.SD$Log10_Frequency),
553 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
554
555
468 interval = intervalReads 556 interval = intervalReads
469 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) 557 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
470 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) 558 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
471 559
472 one = triplets[triplets$Sample == "14696_reg_BM",] 560 one = triplets[triplets$Sample == "14696_reg_BM",]