| 0 | 1 args <- commandArgs(trailingOnly = TRUE) | 
|  | 2 | 
|  | 3 inFile = args[1] | 
|  | 4 outDir = args[2] | 
| 3 | 5 logfile = args[3] | 
|  | 6 min_freq = as.numeric(args[4]) | 
|  | 7 min_cells = as.numeric(args[5]) | 
|  | 8 | 
|  | 9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F) | 
| 0 | 10 | 
| 4 | 11 library(ggplot2) | 
|  | 12 library(reshape2) | 
|  | 13 library(data.table) | 
|  | 14 library(grid) | 
|  | 15 library(parallel) | 
| 0 | 16 #require(xtable) | 
| 3 | 17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T) | 
| 9 | 18 dat = read.table(inFile, header=T, sep="\t", dec=",", fill=T, stringsAsFactors=F) | 
|  | 19 dat = dat[!is.na(dat$Patient),] | 
|  | 20 | 
| 0 | 21 setwd(outDir) | 
| 3 | 22 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T) | 
| 2 | 23 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1))) | 
|  | 24 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1))) | 
|  | 25 | 
| 9 | 26 str(dat) | 
|  | 27 cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T) | 
|  | 28 dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | 
|  | 29 | 
| 3 | 30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T) | 
| 0 | 31 dat$Frequency = ((10^dat$Log10_Frequency)*100) | 
| 2 | 32 | 
| 3 | 33 dat = dat[dat$Frequency >= min_freq,] | 
|  | 34 | 
|  | 35 cat("<tr><td>Normalizing cell count to 1.000.000</td></tr>", file=logfile, append=T) | 
| 2 | 36 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2) | 
| 3 | 37 dat = dat[dat$normalized_read_count >= min_cells,] | 
| 2 | 38 dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence) | 
| 9 | 39 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),] | 
|  | 40 | 
| 0 | 41 patients = split(dat, dat$Patient, drop=T) | 
| 9 | 42 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000)) | 
| 6 | 43 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5)) | 
| 0 | 44 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV") | 
|  | 45 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*") | 
|  | 46 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb") | 
|  | 47 Titles = factor(Titles, levels=Titles) | 
|  | 48 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) | 
|  | 49 | 
|  | 50 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ | 
|  | 51   x$Sample = factor(x$Sample, levels=unique(x$Sample)) | 
|  | 52   onShort = "reads" | 
|  | 53   if(on == "Frequency"){ | 
|  | 54     onShort = "freq" | 
|  | 55   } | 
|  | 56   splt = split(x, x$Sample, drop=T) | 
| 4 | 57   type="pair" | 
| 2 | 58   if(length(splt) == 1){ | 
| 3 | 59     print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample")) | 
| 4 | 60     splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0)) | 
|  | 61     type="single" | 
| 2 | 62   } | 
| 0 | 63   patient1 = splt[[1]] | 
|  | 64   patient2 = splt[[2]] | 
|  | 65 | 
|  | 66   threshholdIndex = which(colnames(product) == "interval") | 
|  | 67   V_SegmentIndex = which(colnames(product) == "V_Segments") | 
|  | 68   J_SegmentIndex = which(colnames(product) == "J_Segments") | 
|  | 69   titleIndex = which(colnames(product) == "Titles") | 
|  | 70   sampleIndex = which(colnames(x) == "Sample") | 
|  | 71   patientIndex = which(colnames(x) == "Patient") | 
|  | 72   oneSample = paste(patient1[1,sampleIndex], sep="") | 
|  | 73   twoSample = paste(patient2[1,sampleIndex], sep="") | 
|  | 74   patient = paste(x[1,patientIndex]) | 
| 3 | 75 | 
| 0 | 76   switched = F | 
|  | 77   if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){ | 
|  | 78     tmp = twoSample | 
|  | 79     twoSample = oneSample | 
|  | 80     oneSample = tmp | 
|  | 81     tmp = patient1 | 
|  | 82     patient1 = patient2 | 
|  | 83     patient2 = tmp | 
|  | 84     switched = T | 
|  | 85   } | 
|  | 86   if(appendtxt){ | 
| 4 | 87     cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) | 
| 0 | 88   } | 
| 3 | 89   cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T) | 
| 9 | 90 | 
|  | 91   patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 
|  | 92   patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 
|  | 93 | 
|  | 94   patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") | 
| 0 | 95   res1 = vector() | 
|  | 96   res2 = vector() | 
|  | 97   resBoth = vector() | 
|  | 98   read1Count = vector() | 
|  | 99   read2Count = vector() | 
| 2 | 100   locussum1 = vector() | 
|  | 101   locussum2 = vector() | 
| 9 | 102 | 
|  | 103   print(patient) | 
| 0 | 104   #for(iter in 1){ | 
|  | 105   for(iter in 1:length(product[,1])){ | 
|  | 106     threshhold = product[iter,threshholdIndex] | 
|  | 107     V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 
|  | 108     J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 
|  | 109     both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold) | 
| 10 | 110     one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[both,]$CDR3_Sense_Sequence.x)) | 
|  | 111     two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[both,]$CDR3_Sense_Sequence.x)) | 
| 2 | 112     read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.x)) | 
|  | 113     read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[both,]$normalized_read_count.y)) | 
| 0 | 114     res1 = append(res1, sum(one)) | 
| 2 | 115     res2 = append(res2, sum(two)) | 
| 0 | 116     resBoth = append(resBoth, sum(both)) | 
| 2 | 117     locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count)) | 
|  | 118     locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count)) | 
| 0 | 119     #threshhold = 0 | 
|  | 120     if(threshhold != 0){ | 
|  | 121       if(sum(one) > 0){ | 
| 9 | 122         dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] | 
|  | 123         colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone") | 
| 0 | 124         filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 
|  | 125         write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 126       } | 
|  | 127       if(sum(two) > 0){ | 
| 9 | 128         dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] | 
|  | 129         colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone") | 
| 0 | 130         filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 
|  | 131         write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 132       } | 
|  | 133     } | 
|  | 134     if(sum(both) > 0){ | 
| 9 | 135       dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")] | 
|  | 136       colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3 Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample)) | 
| 0 | 137       filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="") | 
|  | 138       write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 139     } | 
|  | 140   } | 
| 2 | 141   patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2) | 
| 0 | 142   if(sum(is.na(patientResult$percentage)) > 0){ | 
|  | 143     patientResult[is.na(patientResult$percentage),]$percentage = 0 | 
|  | 144   } | 
|  | 145   colnames(patientResult)[6] = oneSample | 
|  | 146   colnames(patientResult)[8] = twoSample | 
|  | 147   colnamesBak = colnames(patientResult) | 
| 2 | 148   colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample)) | 
| 0 | 149   write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 150   colnames(patientResult) = colnamesBak | 
|  | 151 | 
|  | 152   patientResult$Locus = factor(patientResult$Locus, Titles) | 
|  | 153   patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep="")) | 
|  | 154 | 
|  | 155   plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")]) | 
|  | 156   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a") | 
|  | 157   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) | 
|  | 158   plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0) | 
|  | 159   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both") | 
|  | 160   plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) | 
|  | 161   png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080) | 
|  | 162   print(plt) | 
|  | 163   dev.off() | 
|  | 164   #(t,r,b,l) | 
|  | 165   plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")]) | 
|  | 166   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a") | 
|  | 167   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) | 
|  | 168   plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0) | 
|  | 169   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right") | 
|  | 170   plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) | 
|  | 171   png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080) | 
|  | 172   print(plt) | 
|  | 173   dev.off() | 
|  | 174 | 
|  | 175   patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2) | 
|  | 176   patientResult$relativeValue = patientResult$value * 10 | 
|  | 177   patientResult[patientResult$relativeValue == 0,]$relativeValue = 1 | 
|  | 178   plt = ggplot(patientResult) | 
|  | 179   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge") | 
|  | 180   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) | 
|  | 181   plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9))) | 
|  | 182   plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2) | 
|  | 183   plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8) | 
|  | 184   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep="")) | 
|  | 185   png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080) | 
|  | 186   print(plt) | 
|  | 187   dev.off() | 
|  | 188 } | 
|  | 189 | 
| 3 | 190 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T) | 
|  | 191 | 
| 0 | 192 interval = intervalFreq | 
|  | 193 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 
| 4 | 194 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 
|  | 195 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T) | 
| 0 | 196 | 
| 3 | 197 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T) | 
|  | 198 | 
| 0 | 199 interval = intervalReads | 
|  | 200 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 
| 4 | 201 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 
| 9 | 202 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count") | 
| 0 | 203 | 
| 3 | 204 cat("</table></html>", file=logfile, append=T) | 
|  | 205 | 
| 7 | 206 | 
|  | 207 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){ | 
|  | 208   onShort = "reads" | 
|  | 209   if(on == "Frequency"){ | 
|  | 210     onShort = "freq" | 
|  | 211   } | 
|  | 212   type="triplet" | 
|  | 213 | 
|  | 214   threshholdIndex = which(colnames(product) == "interval") | 
|  | 215   V_SegmentIndex = which(colnames(product) == "V_Segments") | 
|  | 216   J_SegmentIndex = which(colnames(product) == "J_Segments") | 
|  | 217   titleIndex = which(colnames(product) == "Titles") | 
|  | 218   sampleIndex = which(colnames(patient1) == "Sample") | 
|  | 219   patientIndex = which(colnames(patient1) == "Patient") | 
|  | 220   oneSample = paste(patient1[1,sampleIndex], sep="") | 
|  | 221   twoSample = paste(patient2[1,sampleIndex], sep="") | 
|  | 222   threeSample = paste(patient3[1,sampleIndex], sep="") | 
|  | 223 | 
| 9 | 224   patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence) | 
|  | 225   patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) | 
|  | 226   patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence) | 
|  | 227 | 
|  | 228   patientMerge = merge(patient1, patient2, by="merge") | 
|  | 229   patientMerge = merge(patientMerge, patient3, by="merge") | 
|  | 230   colnames(patientMerge)[28:length(colnames(patientMerge))] = paste(colnames(patientMerge)[28:length(colnames(patientMerge))], ".z", sep="") | 
| 7 | 231   res1 = vector() | 
|  | 232   res2 = vector() | 
|  | 233   res3 = vector() | 
|  | 234   resAll = vector() | 
|  | 235   read1Count = vector() | 
|  | 236   read2Count = vector() | 
|  | 237   read3Count = vector() | 
|  | 238 | 
|  | 239   if(appendTriplets){ | 
| 9 | 240     cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3) | 
| 7 | 241   } | 
|  | 242   for(iter in 1:length(product[,1])){ | 
|  | 243     threshhold = product[iter,threshholdIndex] | 
|  | 244     V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="") | 
|  | 245     J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") | 
|  | 246     all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,paste(on, ".x", sep="")] > threshhold & patientMerge[,paste(on, ".y", sep="")] > threshhold & patientMerge[,paste(on, ".z", sep="")] > threshhold) | 
| 10 | 247     one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) | 
|  | 248     two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) | 
|  | 249     three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$CDR3_Sense_Sequence %in% patientMerge[all,]$CDR3_Sense_Sequence.x)) | 
| 7 | 250 | 
|  | 251     read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) | 
|  | 252     read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) | 
|  | 253     read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z)) | 
|  | 254     res1 = append(res1, sum(one)) | 
|  | 255     res2 = append(res2, sum(two)) | 
|  | 256     res3 = append(res3, sum(three)) | 
|  | 257     resAll = append(resAll, sum(all)) | 
|  | 258     #threshhold = 0 | 
|  | 259     if(threshhold != 0){ | 
|  | 260       if(sum(one) > 0){ | 
| 9 | 261         dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] | 
| 7 | 262         colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") | 
|  | 263         filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="") | 
|  | 264         write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 265       } | 
|  | 266       if(sum(two) > 0){ | 
| 9 | 267         dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] | 
| 7 | 268         colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") | 
|  | 269         filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="") | 
|  | 270         write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 271       } | 
|  | 272       if(sum(three) > 0){ | 
| 9 | 273         dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")] | 
| 7 | 274         colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone") | 
|  | 275         filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 
|  | 276         write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 277       } | 
|  | 278     } | 
|  | 279     if(sum(all) > 0){ | 
| 9 | 280       dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "CDR3_Sense_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")] | 
|  | 281       colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"CDR3_Sense_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample)) | 
| 7 | 282       filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="") | 
|  | 283       write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 284     } | 
|  | 285   } | 
|  | 286   patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count)) | 
|  | 287   colnames(patientResult)[6] = oneSample | 
|  | 288   colnames(patientResult)[8] = twoSample | 
|  | 289   colnames(patientResult)[10] = threeSample | 
|  | 290 | 
|  | 291   colnamesBak = colnames(patientResult) | 
|  | 292   colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample), paste("Normalized Read Count", twoSample), paste("Number of sequences", threeSample), paste("Normalized Read Count", threeSample)) | 
|  | 293   write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T) | 
|  | 294   colnames(patientResult) = colnamesBak | 
|  | 295 | 
|  | 296   patientResult$Locus = factor(patientResult$Locus, Titles) | 
|  | 297   patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep="")) | 
|  | 298 | 
|  | 299   plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")]) | 
|  | 300   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a") | 
|  | 301   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) | 
|  | 302   plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0) | 
|  | 303   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All") | 
|  | 304   plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines")) | 
|  | 305   png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080) | 
|  | 306   print(plt) | 
|  | 307   dev.off() | 
|  | 308 | 
|  | 309   fontSize = 4 | 
|  | 310 | 
|  | 311   bak = patientResult | 
|  | 312   patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2) | 
|  | 313   patientResult$relativeValue = patientResult$value * 10 | 
|  | 314   patientResult[patientResult$relativeValue == 0,]$relativeValue = 1 | 
|  | 315   plt = ggplot(patientResult) | 
|  | 316   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge") | 
|  | 317   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) | 
|  | 318   plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9))) | 
|  | 319   plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize) | 
|  | 320   plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize) | 
|  | 321   plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize) | 
|  | 322   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample") | 
|  | 323   png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080) | 
|  | 324   print(plt) | 
|  | 325   dev.off() | 
|  | 326 } | 
|  | 327 | 
| 9 | 328 | 
|  | 329 triplets$uniqueID = "ID" | 
|  | 330 | 
|  | 331 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 
|  | 332 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 
|  | 333 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left" | 
|  | 334 | 
|  | 335 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 
|  | 336 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 
|  | 337 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right" | 
|  | 338 | 
|  | 339 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696" | 
|  | 340 | 
|  | 341 triplets = data.frame(data.table(triplets)[, list(Patient=unique(.SD$uniqueID), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | 
|  | 342 | 
|  | 343 triplets$Frequency = (10^as.numeric(triplets$Log10_Frequency))*100 | 
|  | 344 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * 1000000 / 2) | 
|  | 345 | 
| 7 | 346 interval = intervalReads | 
|  | 347 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 
|  | 348 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 
|  | 349 | 
| 9 | 350 one = triplets[triplets$Sample == "14696_reg_BM",] | 
|  | 351 two = triplets[triplets$Sample == "24536_reg_BM",] | 
|  | 352 three = triplets[triplets$Sample == "24062_reg_BM",] | 
| 8 | 353 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T) | 
| 7 | 354 | 
| 9 | 355 one = triplets[triplets$Sample == "16278_Left",] | 
|  | 356 two = triplets[triplets$Sample == "26402_Left",] | 
|  | 357 three = triplets[triplets$Sample == "26759_Left",] | 
| 8 | 358 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T) | 
| 7 | 359 | 
| 9 | 360 one = triplets[triplets$Sample == "16278_Right",] | 
|  | 361 two = triplets[triplets$Sample == "26402_Right",] | 
|  | 362 three = triplets[triplets$Sample == "26759_Right",] | 
| 8 | 363 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T) | 
| 7 | 364 | 
|  | 365 | 
|  | 366 interval = intervalFreq | 
|  | 367 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval)) | 
|  | 368 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval))) | 
|  | 369 | 
| 9 | 370 one = triplets[triplets$Sample == "14696_reg_BM",] | 
|  | 371 two = triplets[triplets$Sample == "24536_reg_BM",] | 
|  | 372 three = triplets[triplets$Sample == "24062_reg_BM",] | 
| 8 | 373 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F) | 
| 7 | 374 | 
| 9 | 375 one = triplets[triplets$Sample == "16278_Left",] | 
|  | 376 two = triplets[triplets$Sample == "26402_Left",] | 
|  | 377 three = triplets[triplets$Sample == "26759_Left",] | 
| 8 | 378 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F) | 
| 7 | 379 | 
| 9 | 380 one = triplets[triplets$Sample == "16278_Right",] | 
|  | 381 two = triplets[triplets$Sample == "26402_Right",] | 
|  | 382 three = triplets[triplets$Sample == "26759_Right",] | 
| 8 | 383 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F) |