| 
0
 | 
     1 args <- commandArgs(trailingOnly = TRUE)
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 inFile = args[1]
 | 
| 
 | 
     4 outDir = args[2]
 | 
| 
3
 | 
     5 logfile = args[3]
 | 
| 
 | 
     6 min_freq = as.numeric(args[4])
 | 
| 
 | 
     7 min_cells = as.numeric(args[5])
 | 
| 
29
 | 
     8 mergeOn = args[6]
 | 
| 
3
 | 
     9 
 | 
| 
 | 
    10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
 | 
| 
0
 | 
    11 
 | 
| 
4
 | 
    12 library(ggplot2)
 | 
| 
 | 
    13 library(reshape2)
 | 
| 
 | 
    14 library(data.table)
 | 
| 
 | 
    15 library(grid)
 | 
| 
 | 
    16 library(parallel)
 | 
| 
0
 | 
    17 #require(xtable)
 | 
| 
3
 | 
    18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
 | 
| 
13
 | 
    19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
 | 
| 
34
 | 
    20 dat = dat[,c("Patient",  "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Related_to_leukemia_clone", "Clone_Sequence")]
 | 
| 
 | 
    21 dat$dsPerM = 0
 | 
| 
9
 | 
    22 dat = dat[!is.na(dat$Patient),]
 | 
| 
13
 | 
    23 dat$Related_to_leukemia_clone = F
 | 
| 
9
 | 
    24 
 | 
| 
0
 | 
    25 setwd(outDir)
 | 
| 
3
 | 
    26 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
 | 
| 
2
 | 
    27 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
 | 
| 
 | 
    28 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
 | 
| 
 | 
    29 
 | 
| 
3
 | 
    30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
 | 
| 
12
 | 
    31 
 | 
| 
13
 | 
    32 dat$Frequency = ((10^dat$Log10_Frequency)*100)
 | 
| 
2
 | 
    33 
 | 
| 
3
 | 
    34 dat = dat[dat$Frequency >= min_freq,]
 | 
| 
 | 
    35 
 | 
| 
13
 | 
    36 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
 | 
| 
 | 
    37 
 | 
| 
 | 
    38 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
 | 
| 
 | 
    39 
 | 
| 
 | 
    40 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
 | 
| 
 | 
    41 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
 | 
| 
 | 
    42 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
 | 
| 
 | 
    45 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
 | 
| 
 | 
    46 
 | 
| 
 | 
    47 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
 | 
| 
34
 | 
    48 print(paste("rows:", nrow(dat)))
 | 
| 
13
 | 
    49 dat = merge(dat, min_cell_count, by="min_cell_paste")
 | 
| 
34
 | 
    50 print(paste("rows:", nrow(dat)))
 | 
| 
13
 | 
    51 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
 | 
| 
 | 
    52 
 | 
| 
3
 | 
    53 dat = dat[dat$normalized_read_count >= min_cells,]
 | 
| 
13
 | 
    54 
 | 
| 
 | 
    55 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
 | 
| 
9
 | 
    56 
 | 
| 
22
 | 
    57 patients = split(dat, dat$Patient, drop=T)
 | 
| 
9
 | 
    58 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
 | 
| 
6
 | 
    59 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
 | 
| 
0
 | 
    60 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
 | 
| 
 | 
    61 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
 | 
| 
 | 
    62 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
 | 
| 
 | 
    63 Titles = factor(Titles, levels=Titles)
 | 
| 
 | 
    64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
 | 
| 
 | 
    65 
 | 
| 
29
 | 
    66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
 | 
| 
 | 
    67 
 | 
| 
51
 | 
    68 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
 | 
| 
 | 
    69 patient.merge.list.second = list()
 | 
| 
56
 | 
    70 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
 | 
| 
 | 
    71 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
 | 
| 
0
 | 
    72 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
 | 
| 
28
 | 
    73   if (!is.data.frame(x) & is.list(x)){
 | 
| 
 | 
    74     x = x[[1]]
 | 
| 
 | 
    75   }
 | 
| 
34
 | 
    76   #x$Sample = factor(x$Sample, levels=unique(x$Sample))
 | 
| 
 | 
    77   x = data.frame(x,stringsAsFactors=F)
 | 
| 
0
 | 
    78   onShort = "reads"
 | 
| 
 | 
    79   if(on == "Frequency"){
 | 
| 
 | 
    80     onShort = "freq"
 | 
| 
 | 
    81   }
 | 
| 
18
 | 
    82   onx = paste(on, ".x", sep="")
 | 
| 
 | 
    83   ony = paste(on, ".y", sep="")
 | 
| 
0
 | 
    84   splt = split(x, x$Sample, drop=T)
 | 
| 
4
 | 
    85   type="pair"
 | 
| 
2
 | 
    86   if(length(splt) == 1){
 | 
| 
3
 | 
    87     print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
 | 
| 
4
 | 
    88     splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
 | 
| 
 | 
    89     type="single"
 | 
| 
2
 | 
    90   }
 | 
| 
0
 | 
    91   patient1 = splt[[1]]
 | 
| 
 | 
    92   patient2 = splt[[2]]
 | 
| 
 | 
    93   
 | 
| 
 | 
    94   threshholdIndex = which(colnames(product) == "interval")
 | 
| 
 | 
    95   V_SegmentIndex = which(colnames(product) == "V_Segments")
 | 
| 
 | 
    96   J_SegmentIndex = which(colnames(product) == "J_Segments")
 | 
| 
 | 
    97   titleIndex = which(colnames(product) == "Titles")
 | 
| 
 | 
    98   sampleIndex = which(colnames(x) == "Sample")
 | 
| 
 | 
    99   patientIndex = which(colnames(x) == "Patient")
 | 
| 
 | 
   100   oneSample = paste(patient1[1,sampleIndex], sep="")
 | 
| 
 | 
   101   twoSample = paste(patient2[1,sampleIndex], sep="")
 | 
| 
 | 
   102   patient = paste(x[1,patientIndex])
 | 
| 
3
 | 
   103 
 | 
| 
0
 | 
   104   switched = F
 | 
| 
 | 
   105   if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
 | 
| 
 | 
   106     tmp = twoSample
 | 
| 
 | 
   107     twoSample = oneSample
 | 
| 
 | 
   108     oneSample = tmp
 | 
| 
 | 
   109     tmp = patient1
 | 
| 
 | 
   110     patient1 = patient2
 | 
| 
 | 
   111     patient2 = tmp
 | 
| 
 | 
   112     switched = T
 | 
| 
 | 
   113   }
 | 
| 
 | 
   114   if(appendtxt){
 | 
| 
4
 | 
   115     cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
 | 
| 
0
 | 
   116   }
 | 
| 
51
 | 
   117   cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
 | 
| 
9
 | 
   118   
 | 
| 
29
 | 
   119   if(mergeOn == "Clone_Sequence"){
 | 
| 
 | 
   120     patient1$merge = paste(patient1$Clone_Sequence)
 | 
| 
 | 
   121     patient2$merge = paste(patient2$Clone_Sequence)
 | 
| 
 | 
   122   } else {
 | 
| 
 | 
   123     patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
 | 
| 
 | 
   124     patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
 | 
| 
 | 
   125   }
 | 
| 
9
 | 
   126   
 | 
| 
30
 | 
   127   scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
 | 
| 
29
 | 
   128   scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
 | 
| 
30
 | 
   129   scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
 | 
| 
 | 
   130   scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
 | 
| 
29
 | 
   131   scatterplot_data$on = onShort
 | 
| 
 | 
   132   
 | 
| 
49
 | 
   133   #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
 | 
| 
 | 
   134   patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
 | 
| 
 | 
   135 
 | 
| 
 | 
   136   cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
 | 
| 
 | 
   137 
 | 
| 
51
 | 
   138   start.time = proc.time()
 | 
| 
 | 
   139   merge.list = c()
 | 
| 
 | 
   140 
 | 
| 
 | 
   141   if(patient %in% names(patient.merge.list)){
 | 
| 
 | 
   142     patientMerge = patient.merge.list[[patient]]
 | 
| 
 | 
   143     merge.list[["second"]] = patient.merge.list.second[[patient]]
 | 
| 
 | 
   144     cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
 | 
| 
 | 
   145 
 | 
| 
 | 
   146     print(names(patient.merge.list))
 | 
| 
 | 
   147   } else {
 | 
| 
 | 
   148     #fuzzy matching here...
 | 
| 
49
 | 
   149     #merge.list = patientMerge$merge
 | 
| 
51
 | 
   150 
 | 
| 
49
 | 
   151     #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
 | 
| 
 | 
   152     #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
 | 
| 
 | 
   153 
 | 
| 
 | 
   154     patient1.fuzzy = patient1
 | 
| 
 | 
   155     patient2.fuzzy = patient2
 | 
| 
 | 
   156 
 | 
| 
36
 | 
   157     #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
 | 
| 
 | 
   158     #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
 | 
| 
51
 | 
   159 
 | 
| 
48
 | 
   160     #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
 | 
| 
 | 
   161     #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
 | 
| 
51
 | 
   162 
 | 
| 
48
 | 
   163     patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
 | 
| 
 | 
   164     patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
 | 
| 
51
 | 
   165 
 | 
| 
49
 | 
   166     #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
 | 
| 
 | 
   167     #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
 | 
| 
51
 | 
   168 
 | 
| 
49
 | 
   169     #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
 | 
| 
 | 
   170     #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
 | 
| 
51
 | 
   171 
 | 
| 
37
 | 
   172     patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
 | 
| 
 | 
   173     patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
 | 
| 
49
 | 
   174 
 | 
| 
 | 
   175     merge.list = list()
 | 
| 
 | 
   176 
 | 
| 
 | 
   177     merge.list[["second"]] = vector()
 | 
| 
 | 
   178 
 | 
| 
37
 | 
   179     while(nrow(patient.fuzzy) > 1){
 | 
| 
 | 
   180       first.merge = patient.fuzzy[1,"merge"]
 | 
| 
 | 
   181       first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
 | 
| 
49
 | 
   182       first.sample = patient.fuzzy[1,"Sample"]
 | 
| 
37
 | 
   183       merge.filter = first.merge == patient.fuzzy$merge
 | 
| 
51
 | 
   184 
 | 
| 
49
 | 
   185       #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
 | 
| 
51
 | 
   186 
 | 
| 
49
 | 
   187       first.sample.filter = first.sample == patient.fuzzy$Sample
 | 
| 
 | 
   188       second.sample.filter = first.sample != patient.fuzzy$Sample
 | 
| 
 | 
   189 
 | 
| 
 | 
   190       #first match same sample, sum to a single row, same for other sample
 | 
| 
 | 
   191       #then merge rows like 'normal'
 | 
| 
51
 | 
   192 
 | 
| 
48
 | 
   193       sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
 | 
| 
49
 | 
   194 
 | 
| 
 | 
   195 
 | 
| 
 | 
   196 
 | 
| 
44
 | 
   197       #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
 | 
| 
49
 | 
   198       first.match.filter = merge.filter & sequence.filter & first.sample.filter
 | 
| 
 | 
   199       second.match.filter = merge.filter & sequence.filter & second.sample.filter
 | 
| 
 | 
   200 
 | 
| 
 | 
   201       first.rows = patient.fuzzy[first.match.filter,]
 | 
| 
 | 
   202       second.rows = patient.fuzzy[second.match.filter,]
 | 
| 
 | 
   203 
 | 
| 
50
 | 
   204       first.rows.v = table(first.rows$V_Segment_Major_Gene)
 | 
| 
 | 
   205       first.rows.v = names(first.rows.v[which.max(first.rows.v)])
 | 
| 
 | 
   206       first.rows.j = table(first.rows$J_Segment_Major_Gene)
 | 
| 
 | 
   207       first.rows.j = names(first.rows.j[which.max(first.rows.j)])
 | 
| 
 | 
   208 
 | 
| 
49
 | 
   209       first.sum = data.frame(merge = first.clone.sequence,
 | 
| 
 | 
   210                              Patient = patient,
 | 
| 
 | 
   211                              Receptor = first.rows[1,"Receptor"],
 | 
| 
 | 
   212                              Sample = first.rows[1,"Sample"],
 | 
| 
 | 
   213                              Cell_Count = first.rows[1,"Cell_Count"],
 | 
| 
 | 
   214                              Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
 | 
| 
 | 
   215                              Log10_Frequency = log10(sum(first.rows$Frequency)),
 | 
| 
 | 
   216                              Total_Read_Count = sum(first.rows$Total_Read_Count),
 | 
| 
 | 
   217                              dsPerM = sum(first.rows$dsPerM),
 | 
| 
50
 | 
   218                              J_Segment_Major_Gene = first.rows.j,
 | 
| 
 | 
   219                              V_Segment_Major_Gene = first.rows.v,
 | 
| 
49
 | 
   220                              Clone_Sequence = first.clone.sequence,
 | 
| 
 | 
   221                              CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
 | 
| 
 | 
   222                              Related_to_leukemia_clone = F,
 | 
| 
 | 
   223                              Frequency = sum(first.rows$Frequency),
 | 
| 
 | 
   224                              locus_V = first.rows[1,"locus_V"],
 | 
| 
 | 
   225                              locus_J = first.rows[1,"locus_J"],
 | 
| 
 | 
   226                              min_cell_count = first.rows[1,"min_cell_count"],
 | 
| 
 | 
   227                              normalized_read_count = sum(first.rows$normalized_read_count),
 | 
| 
 | 
   228                              paste = first.rows[1,"paste"],
 | 
| 
 | 
   229                              min_cell_paste = first.rows[1,"min_cell_paste"])
 | 
| 
 | 
   230 
 | 
| 
 | 
   231       if(nrow(second.rows) > 0){
 | 
| 
50
 | 
   232         second.rows.v = table(second.rows$V_Segment_Major_Gene)
 | 
| 
 | 
   233         second.rows.v = names(second.rows.v[which.max(second.rows.v)])
 | 
| 
 | 
   234         second.rows.j = table(second.rows$J_Segment_Major_Gene)
 | 
| 
 | 
   235         second.rows.j = names(second.rows.j[which.max(second.rows.j)])
 | 
| 
 | 
   236 
 | 
| 
55
 | 
   237         second.sum = data.frame(merge = first.clone.sequence,
 | 
| 
49
 | 
   238                                Patient = patient,
 | 
| 
 | 
   239                                Receptor = second.rows[1,"Receptor"],
 | 
| 
 | 
   240                                Sample = second.rows[1,"Sample"],
 | 
| 
 | 
   241                                Cell_Count = second.rows[1,"Cell_Count"],
 | 
| 
 | 
   242                                Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
 | 
| 
 | 
   243                                Log10_Frequency = log10(sum(second.rows$Frequency)),
 | 
| 
 | 
   244                                Total_Read_Count = sum(second.rows$Total_Read_Count),
 | 
| 
 | 
   245                                dsPerM = sum(second.rows$dsPerM),
 | 
| 
50
 | 
   246                                J_Segment_Major_Gene = second.rows.j,
 | 
| 
 | 
   247                                V_Segment_Major_Gene = second.rows.v,
 | 
| 
49
 | 
   248                                Clone_Sequence = first.clone.sequence,
 | 
| 
 | 
   249                                CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
 | 
| 
 | 
   250                                Related_to_leukemia_clone = F,
 | 
| 
 | 
   251                                Frequency = sum(second.rows$Frequency),
 | 
| 
 | 
   252                                locus_V = second.rows[1,"locus_V"],
 | 
| 
 | 
   253                                locus_J = second.rows[1,"locus_J"],
 | 
| 
 | 
   254                                min_cell_count = second.rows[1,"min_cell_count"],
 | 
| 
 | 
   255                                normalized_read_count = sum(second.rows$normalized_read_count),
 | 
| 
 | 
   256                                paste = second.rows[1,"paste"],
 | 
| 
 | 
   257                                min_cell_paste = second.rows[1,"min_cell_paste"])
 | 
| 
 | 
   258 
 | 
| 
 | 
   259         patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
 | 
| 
 | 
   260         patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
 | 
| 
 | 
   261 
 | 
| 
50
 | 
   262         hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
 | 
| 
 | 
   263         merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
 | 
| 
49
 | 
   264 
 | 
| 
56
 | 
   265         tmp.rows = rbind(first.rows, second.rows)
 | 
| 
 | 
   266         tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
 | 
| 
 | 
   267 
 | 
| 
 | 
   268         if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
 | 
| 
 | 
   269           cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
 | 
| 
 | 
   270         } else {
 | 
| 
 | 
   271           second.clone.sequence = second.rows[1,"Clone_Sequence"]
 | 
| 
 | 
   272           if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
 | 
| 
 | 
   273             cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
 | 
| 
 | 
   274           } else {
 | 
| 
58
 | 
   275             #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
 | 
| 
49
 | 
   276           }
 | 
| 
 | 
   277         }
 | 
| 
 | 
   278 
 | 
| 
59
 | 
   279       } else if(nrow(first.rows) > 1) {
 | 
| 
 | 
   280         if(patient1[1,"Sample"] == first.sample){
 | 
| 
 | 
   281           patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
 | 
| 
 | 
   282           patient1 = rbind(patient1, first.sum)
 | 
| 
 | 
   283         } else {
 | 
| 
 | 
   284           patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
 | 
| 
 | 
   285           patient2 = rbind(patient2, first.sum)
 | 
| 
 | 
   286         }
 | 
| 
 | 
   287 
 | 
| 
 | 
   288         hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
 | 
| 
 | 
   289         merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
 | 
| 
 | 
   290 
 | 
| 
 | 
   291         patient.fuzzy = patient.fuzzy[-first.match.filter,]
 | 
| 
 | 
   292 
 | 
| 
 | 
   293         cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
 | 
| 
37
 | 
   294       } else {
 | 
| 
 | 
   295         patient.fuzzy = patient.fuzzy[-1,]
 | 
| 
34
 | 
   296       }
 | 
| 
 | 
   297     }
 | 
| 
51
 | 
   298     patient.merge.list[[patient]] <<- patientMerge
 | 
| 
 | 
   299     patient.merge.list.second[[patient]] <<- merge.list[["second"]]
 | 
| 
 | 
   300     cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
 | 
| 
34
 | 
   301   }
 | 
| 
51
 | 
   302 
 | 
| 
52
 | 
   303   patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
 | 
| 
 | 
   304   patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
 | 
| 
51
 | 
   305 
 | 
| 
37
 | 
   306   
 | 
| 
18
 | 
   307   patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
 | 
| 
0
 | 
   308   res1 = vector()
 | 
| 
 | 
   309   res2 = vector()
 | 
| 
 | 
   310   resBoth = vector()
 | 
| 
 | 
   311   read1Count = vector()
 | 
| 
 | 
   312   read2Count = vector()
 | 
| 
2
 | 
   313   locussum1 = vector()
 | 
| 
 | 
   314   locussum2 = vector()
 | 
| 
9
 | 
   315   
 | 
| 
0
 | 
   316   #for(iter in 1){
 | 
| 
 | 
   317   for(iter in 1:length(product[,1])){
 | 
| 
 | 
   318     threshhold = product[iter,threshholdIndex]
 | 
| 
 | 
   319     V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
 | 
| 
 | 
   320     J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
 | 
| 
18
 | 
   321     #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
 | 
| 
29
 | 
   322     both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
 | 
| 
30
 | 
   323     one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
 | 
| 
 | 
   324     two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
 | 
| 
14
 | 
   325     read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
 | 
| 
 | 
   326     read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
 | 
| 
0
 | 
   327     res1 = append(res1, sum(one))
 | 
| 
2
 | 
   328     res2 = append(res2, sum(two))
 | 
| 
0
 | 
   329     resBoth = append(resBoth, sum(both))
 | 
| 
2
 | 
   330     locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
 | 
| 
 | 
   331     locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
 | 
| 
0
 | 
   332     #threshhold = 0
 | 
| 
 | 
   333     if(threshhold != 0){
 | 
| 
 | 
   334       if(sum(one) > 0){
 | 
| 
15
 | 
   335         dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
 | 
| 
 | 
   336         colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
 | 
| 
0
 | 
   337         filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
 | 
| 
 | 
   338         write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   339       }
 | 
| 
 | 
   340       if(sum(two) > 0){
 | 
| 
15
 | 
   341         dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
 | 
| 
 | 
   342         colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
 | 
| 
0
 | 
   343         filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
 | 
| 
 | 
   344         write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   345       }
 | 
| 
29
 | 
   346     } else {
 | 
| 
 | 
   347       scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
 | 
| 
49
 | 
   348       #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),]
 | 
| 
 | 
   349       scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
 | 
| 
29
 | 
   350       if(nrow(scatterplot_locus_data) > 0){
 | 
| 
 | 
   351         scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
 | 
| 
 | 
   352       }
 | 
| 
34
 | 
   353       in_one = (scatterplot_locus_data$merge %in% patient1$merge)
 | 
| 
 | 
   354       in_two = (scatterplot_locus_data$merge %in% patient2$merge)
 | 
| 
 | 
   355       if(any(in_two)){
 | 
| 
49
 | 
   356         scatterplot_locus_data[in_two,]$type = twoSample
 | 
| 
34
 | 
   357       }
 | 
| 
49
 | 
   358       in_both = (scatterplot_locus_data$merge %in% patientMerge$merge)
 | 
| 
 | 
   359       #merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]])
 | 
| 
 | 
   360       #exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches)
 | 
| 
30
 | 
   361       if(any(in_both)){
 | 
| 
 | 
   362         scatterplot_locus_data[in_both,]$type = "In Both"
 | 
| 
 | 
   363       }
 | 
| 
29
 | 
   364       if(type == "single"){
 | 
| 
 | 
   365         single_patients <<- rbind(single_patients, scatterplot_locus_data)
 | 
| 
 | 
   366       }
 | 
| 
 | 
   367       p = NULL
 | 
| 
 | 
   368       if(nrow(scatterplot_locus_data) != 0){
 | 
| 
 | 
   369         if(on == "normalized_read_count"){
 | 
| 
30
 | 
   370           scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
 | 
| 
49
 | 
   371           p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
 | 
| 
29
 | 
   372         } else {
 | 
| 
49
 | 
   373           p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
 | 
| 
29
 | 
   374         }
 | 
| 
 | 
   375         p = p + geom_point(aes(colour=type), position="jitter")
 | 
| 
 | 
   376         p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
 | 
| 
 | 
   377       } else {
 | 
| 
 | 
   378         p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
 | 
| 
 | 
   379       }
 | 
| 
 | 
   380       png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
 | 
| 
 | 
   381       print(p)
 | 
| 
 | 
   382       dev.off()
 | 
| 
0
 | 
   383     }
 | 
| 
 | 
   384     if(sum(both) > 0){
 | 
| 
15
 | 
   385       dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
 | 
| 
 | 
   386       colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
 | 
| 
0
 | 
   387       filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
 | 
| 
 | 
   388       write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
29
 | 
   389     } 
 | 
| 
0
 | 
   390   }
 | 
| 
2
 | 
   391   patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
 | 
| 
0
 | 
   392   if(sum(is.na(patientResult$percentage)) > 0){
 | 
| 
 | 
   393     patientResult[is.na(patientResult$percentage),]$percentage = 0
 | 
| 
 | 
   394   }
 | 
| 
 | 
   395   colnames(patientResult)[6] = oneSample
 | 
| 
 | 
   396   colnames(patientResult)[8] = twoSample
 | 
| 
 | 
   397   colnamesBak = colnames(patientResult)
 | 
| 
2
 | 
   398   colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
 | 
| 
0
 | 
   399   write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   400   colnames(patientResult) = colnamesBak
 | 
| 
 | 
   401   
 | 
| 
 | 
   402   patientResult$Locus = factor(patientResult$Locus, Titles)
 | 
| 
 | 
   403   patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
 | 
| 
 | 
   404   
 | 
| 
 | 
   405   plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
 | 
| 
 | 
   406   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
 | 
| 
 | 
   407   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
 | 
| 
 | 
   408   plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
 | 
| 
 | 
   409   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
 | 
| 
 | 
   410   plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
 | 
| 
 | 
   411   png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
 | 
| 
 | 
   412   print(plt)
 | 
| 
 | 
   413   dev.off()
 | 
| 
 | 
   414   #(t,r,b,l)
 | 
| 
 | 
   415   plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
 | 
| 
 | 
   416   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
 | 
| 
 | 
   417   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
 | 
| 
 | 
   418   plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
 | 
| 
 | 
   419   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
 | 
| 
 | 
   420   plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
 | 
| 
 | 
   421   png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
 | 
| 
 | 
   422   print(plt)
 | 
| 
 | 
   423   dev.off()
 | 
| 
 | 
   424   
 | 
| 
 | 
   425   patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
 | 
| 
 | 
   426   patientResult$relativeValue = patientResult$value * 10
 | 
| 
 | 
   427   patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
 | 
| 
 | 
   428   plt = ggplot(patientResult)
 | 
| 
 | 
   429   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
 | 
| 
 | 
   430   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
 | 
| 
 | 
   431   plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
 | 
| 
 | 
   432   plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
 | 
| 
 | 
   433   plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
 | 
| 
 | 
   434   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
 | 
| 
 | 
   435   png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
 | 
| 
 | 
   436   print(plt)
 | 
| 
 | 
   437   dev.off()
 | 
| 
 | 
   438 }
 | 
| 
 | 
   439 
 | 
| 
3
 | 
   440 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
 | 
| 
 | 
   441 
 | 
| 
0
 | 
   442 interval = intervalFreq
 | 
| 
 | 
   443 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
 | 
| 
4
 | 
   444 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
 | 
| 
61
 | 
   445 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
 | 
| 
0
 | 
   446 
 | 
| 
3
 | 
   447 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
 | 
| 
 | 
   448 
 | 
| 
0
 | 
   449 interval = intervalReads
 | 
| 
 | 
   450 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
 | 
| 
4
 | 
   451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
 | 
| 
61
 | 
   452 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
 | 
| 
33
 | 
   453 
 | 
| 
 | 
   454 if(nrow(single_patients) > 0){
 | 
| 
 | 
   455 	scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
 | 
| 
 | 
   456 	p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
 | 
| 
 | 
   457 	p = p + geom_point(aes(colour=type), position="jitter")
 | 
| 
 | 
   458 	p = p + xlab("In one or both samples") + ylab("Reads")
 | 
| 
 | 
   459 	p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
 | 
| 
 | 
   460 	png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
 | 
| 
 | 
   461 	print(p)
 | 
| 
 | 
   462 	dev.off()
 | 
| 
7
 | 
   463 
 | 
| 
33
 | 
   464 	p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
 | 
| 
 | 
   465 	p = p + geom_point(aes(colour=type), position="jitter")
 | 
| 
 | 
   466 	p = p + xlab("In one or both samples") + ylab("Frequency")
 | 
| 
 | 
   467 	p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
 | 
| 
 | 
   468 	png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
 | 
| 
 | 
   469 	print(p)
 | 
| 
 | 
   470 	dev.off()
 | 
| 
 | 
   471 } else {
 | 
| 
 | 
   472 	empty <- data.frame()
 | 
| 
 | 
   473 	p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
 | 
| 
 | 
   474 	
 | 
| 
 | 
   475 	png("singles_reads_scatterplot.png", width=400, height=300)
 | 
| 
 | 
   476 	print(p)
 | 
| 
 | 
   477 	dev.off()	
 | 
| 
 | 
   478 	
 | 
| 
 | 
   479 	png("singles_freq_scatterplot.png", width=400, height=300)
 | 
| 
 | 
   480 	print(p)
 | 
| 
 | 
   481 	dev.off()
 | 
| 
 | 
   482 }
 | 
| 
60
 | 
   483 
 | 
| 
 | 
   484 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
 | 
| 
 | 
   485 patient.merge.list.second = list()
 | 
| 
 | 
   486 
 | 
| 
7
 | 
   487 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
 | 
| 
 | 
   488   onShort = "reads"
 | 
| 
 | 
   489   if(on == "Frequency"){
 | 
| 
 | 
   490     onShort = "freq"
 | 
| 
 | 
   491   }
 | 
| 
18
 | 
   492   onx = paste(on, ".x", sep="")
 | 
| 
 | 
   493   ony = paste(on, ".y", sep="")
 | 
| 
 | 
   494   onz = paste(on, ".z", sep="")
 | 
| 
7
 | 
   495   type="triplet"
 | 
| 
 | 
   496   
 | 
| 
 | 
   497   threshholdIndex = which(colnames(product) == "interval")
 | 
| 
 | 
   498   V_SegmentIndex = which(colnames(product) == "V_Segments")
 | 
| 
 | 
   499   J_SegmentIndex = which(colnames(product) == "J_Segments")
 | 
| 
 | 
   500   titleIndex = which(colnames(product) == "Titles")
 | 
| 
 | 
   501   sampleIndex = which(colnames(patient1) == "Sample")
 | 
| 
 | 
   502   patientIndex = which(colnames(patient1) == "Patient")
 | 
| 
 | 
   503   oneSample = paste(patient1[1,sampleIndex], sep="")
 | 
| 
 | 
   504   twoSample = paste(patient2[1,sampleIndex], sep="")
 | 
| 
 | 
   505   threeSample = paste(patient3[1,sampleIndex], sep="")
 | 
| 
60
 | 
   506 
 | 
| 
29
 | 
   507   if(mergeOn == "Clone_Sequence"){
 | 
| 
 | 
   508     patient1$merge = paste(patient1$Clone_Sequence)
 | 
| 
 | 
   509 		patient2$merge = paste(patient2$Clone_Sequence)
 | 
| 
 | 
   510 		patient3$merge = paste(patient3$Clone_Sequence)
 | 
| 
 | 
   511 
 | 
| 
 | 
   512   } else {
 | 
| 
 | 
   513 		patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
 | 
| 
 | 
   514 		patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
 | 
| 
 | 
   515 		patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
 | 
| 
 | 
   516   }
 | 
| 
60
 | 
   517 
 | 
| 
 | 
   518   #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
 | 
| 
 | 
   519   patient1.fuzzy = patient1
 | 
| 
 | 
   520   patient2.fuzzy = patient2
 | 
| 
 | 
   521   patient3.fuzzy = patient3
 | 
| 
 | 
   522 
 | 
| 
 | 
   523   cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
 | 
| 
 | 
   524 
 | 
| 
 | 
   525   patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
 | 
| 
 | 
   526   patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
 | 
| 
 | 
   527   patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
 | 
| 
 | 
   528 
 | 
| 
 | 
   529   patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
 | 
| 
 | 
   530 
 | 
| 
 | 
   531   other.sample.list = list()
 | 
| 
 | 
   532   other.sample.list[[oneSample]] = c(twoSample, threeSample)
 | 
| 
 | 
   533   other.sample.list[[twoSample]] = c(oneSample, threeSample)
 | 
| 
 | 
   534   other.sample.list[[threeSample]] = c(oneSample, twoSample)
 | 
| 
 | 
   535 
 | 
| 
9
 | 
   536   patientMerge = merge(patient1, patient2, by="merge")
 | 
| 
 | 
   537   patientMerge = merge(patientMerge, patient3, by="merge")
 | 
| 
28
 | 
   538   colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
 | 
| 
60
 | 
   539   #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
 | 
| 
 | 
   540   patientMerge = patientMerge[NULL,]
 | 
| 
 | 
   541 
 | 
| 
 | 
   542   duo.merge.list = list()
 | 
| 
 | 
   543 
 | 
| 
20
 | 
   544   patientMerge12 = merge(patient1, patient2, by="merge")
 | 
| 
60
 | 
   545   #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
 | 
| 
 | 
   546   patientMerge12 = patientMerge12[NULL,]
 | 
| 
 | 
   547   duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
 | 
| 
 | 
   548   duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
 | 
| 
 | 
   549 
 | 
| 
20
 | 
   550   patientMerge13 = merge(patient1, patient3, by="merge")
 | 
| 
60
 | 
   551   #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
 | 
| 
 | 
   552   patientMerge13 = patientMerge13[NULL,]
 | 
| 
 | 
   553   duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
 | 
| 
 | 
   554   duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
 | 
| 
 | 
   555 
 | 
| 
20
 | 
   556   patientMerge23 = merge(patient2, patient3, by="merge")
 | 
| 
60
 | 
   557   #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
 | 
| 
 | 
   558   patientMerge23 = patientMerge23[NULL,]
 | 
| 
 | 
   559   duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
 | 
| 
 | 
   560   duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
 | 
| 
 | 
   561 
 | 
| 
 | 
   562   merge.list = list()
 | 
| 
 | 
   563   merge.list[["second"]] = vector()
 | 
| 
 | 
   564 
 | 
| 
 | 
   565   start.time = proc.time()
 | 
| 
 | 
   566   if(paste(label1, "123") %in% names(patient.merge.list)){
 | 
| 
 | 
   567     patientMerge = patient.merge.list[[paste(label1, "123")]]
 | 
| 
 | 
   568     patientMerge12 = patient.merge.list[[paste(label1, "12")]]
 | 
| 
 | 
   569     patientMerge13 = patient.merge.list[[paste(label1, "13")]]
 | 
| 
 | 
   570     patientMerge23 = patient.merge.list[[paste(label1, "23")]]
 | 
| 
 | 
   571 
 | 
| 
 | 
   572     merge.list[["second"]] = patient.merge.list.second[[label1]]
 | 
| 
 | 
   573 
 | 
| 
 | 
   574     cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
 | 
| 
 | 
   575   } else {
 | 
| 
 | 
   576     while(nrow(patient.fuzzy) > 0){
 | 
| 
 | 
   577       first.merge = patient.fuzzy[1,"merge"]
 | 
| 
 | 
   578       first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
 | 
| 
 | 
   579       first.sample = patient.fuzzy[1,"Sample"]
 | 
| 
 | 
   580 
 | 
| 
 | 
   581       merge.filter = first.merge == patient.fuzzy$merge
 | 
| 
 | 
   582 
 | 
| 
 | 
   583       second.sample = other.sample.list[[first.sample]][1]
 | 
| 
 | 
   584       third.sample = other.sample.list[[first.sample]][2]
 | 
| 
 | 
   585 
 | 
| 
 | 
   586       sample.filter.1 = first.sample == patient.fuzzy$Sample
 | 
| 
 | 
   587       sample.filter.2 = second.sample == patient.fuzzy$Sample
 | 
| 
 | 
   588       sample.filter.3 = third.sample == patient.fuzzy$Sample
 | 
| 
 | 
   589 
 | 
| 
 | 
   590       sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
 | 
| 
 | 
   591 
 | 
| 
 | 
   592       match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
 | 
| 
 | 
   593       match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
 | 
| 
 | 
   594       match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
 | 
| 
 | 
   595 
 | 
| 
 | 
   596       matches.in.1 = any(match.filter.1)
 | 
| 
 | 
   597       matches.in.2 = any(match.filter.2)
 | 
| 
 | 
   598       matches.in.3 = any(match.filter.3)
 | 
| 
 | 
   599 
 | 
| 
 | 
   600 
 | 
| 
 | 
   601 
 | 
| 
 | 
   602       rows.1 = patient.fuzzy[match.filter.1,]
 | 
| 
 | 
   603 
 | 
| 
 | 
   604       sum.1 = data.frame(merge = first.clone.sequence,
 | 
| 
 | 
   605                          Patient = label1,
 | 
| 
 | 
   606                          Receptor = rows.1[1,"Receptor"],
 | 
| 
 | 
   607                          Sample = rows.1[1,"Sample"],
 | 
| 
 | 
   608                          Cell_Count = rows.1[1,"Cell_Count"],
 | 
| 
 | 
   609                          Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
 | 
| 
 | 
   610                          Log10_Frequency = log10(sum(rows.1$Frequency)),
 | 
| 
 | 
   611                          Total_Read_Count = sum(rows.1$Total_Read_Count),
 | 
| 
 | 
   612                          dsPerM = sum(rows.1$dsPerM),
 | 
| 
 | 
   613                          J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
 | 
| 
 | 
   614                          V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
 | 
| 
 | 
   615                          Clone_Sequence = first.clone.sequence,
 | 
| 
 | 
   616                          CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
 | 
| 
 | 
   617                          Related_to_leukemia_clone = F,
 | 
| 
 | 
   618                          Frequency = sum(rows.1$Frequency),
 | 
| 
 | 
   619                          locus_V = rows.1[1,"locus_V"],
 | 
| 
 | 
   620                          locus_J = rows.1[1,"locus_J"],
 | 
| 
 | 
   621                          normalized_read_count = sum(rows.1$normalized_read_count))
 | 
| 
 | 
   622       sum.2 = sum.1[NULL,]
 | 
| 
 | 
   623       rows.2 = patient.fuzzy[match.filter.2,]
 | 
| 
 | 
   624       if(matches.in.2){
 | 
| 
 | 
   625         sum.2 = data.frame(merge = first.clone.sequence,
 | 
| 
 | 
   626                            Patient = label1,
 | 
| 
 | 
   627                            Receptor = rows.2[1,"Receptor"],
 | 
| 
 | 
   628                            Sample = rows.2[1,"Sample"],
 | 
| 
 | 
   629                            Cell_Count = rows.2[1,"Cell_Count"],
 | 
| 
 | 
   630                            Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
 | 
| 
 | 
   631                            Log10_Frequency = log10(sum(rows.2$Frequency)),
 | 
| 
 | 
   632                            Total_Read_Count = sum(rows.2$Total_Read_Count),
 | 
| 
 | 
   633                            dsPerM = sum(rows.2$dsPerM),
 | 
| 
 | 
   634                            J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
 | 
| 
 | 
   635                            V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
 | 
| 
 | 
   636                            Clone_Sequence = first.clone.sequence,
 | 
| 
 | 
   637                            CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
 | 
| 
 | 
   638                            Related_to_leukemia_clone = F,
 | 
| 
 | 
   639                            Frequency = sum(rows.2$Frequency),
 | 
| 
 | 
   640                            locus_V = rows.2[1,"locus_V"],
 | 
| 
 | 
   641                            locus_J = rows.2[1,"locus_J"],
 | 
| 
 | 
   642                            normalized_read_count = sum(rows.2$normalized_read_count))
 | 
| 
 | 
   643       }
 | 
| 
 | 
   644 
 | 
| 
 | 
   645       sum.3 = sum.1[NULL,]
 | 
| 
 | 
   646       rows.3 = patient.fuzzy[match.filter.3,]
 | 
| 
 | 
   647       if(matches.in.3){
 | 
| 
 | 
   648         sum.3 = data.frame(merge = first.clone.sequence,
 | 
| 
 | 
   649                            Patient = label1,
 | 
| 
 | 
   650                            Receptor = rows.3[1,"Receptor"],
 | 
| 
 | 
   651                            Sample = rows.3[1,"Sample"],
 | 
| 
 | 
   652                            Cell_Count = rows.3[1,"Cell_Count"],
 | 
| 
 | 
   653                            Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
 | 
| 
 | 
   654                            Log10_Frequency = log10(sum(rows.3$Frequency)),
 | 
| 
 | 
   655                            Total_Read_Count = sum(rows.3$Total_Read_Count),
 | 
| 
 | 
   656                            dsPerM = sum(rows.3$dsPerM),
 | 
| 
 | 
   657                            J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
 | 
| 
 | 
   658                            V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
 | 
| 
 | 
   659                            Clone_Sequence = first.clone.sequence,
 | 
| 
 | 
   660                            CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
 | 
| 
 | 
   661                            Related_to_leukemia_clone = F,
 | 
| 
 | 
   662                            Frequency = sum(rows.3$Frequency),
 | 
| 
 | 
   663                            locus_V = rows.3[1,"locus_V"],
 | 
| 
 | 
   664                            locus_J = rows.3[1,"locus_J"],
 | 
| 
 | 
   665                            normalized_read_count = sum(rows.3$normalized_read_count))
 | 
| 
 | 
   666       }
 | 
| 
 | 
   667 
 | 
| 
 | 
   668       if(matches.in.2 & matches.in.3){
 | 
| 
 | 
   669         merge.123 = merge(sum.1, sum.2, by="merge")
 | 
| 
 | 
   670         merge.123 = merge(merge.123, sum.3, by="merge")
 | 
| 
 | 
   671         colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
 | 
| 
 | 
   672         #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
 | 
| 
 | 
   673 
 | 
| 
 | 
   674         patientMerge = rbind(patientMerge, merge.123)
 | 
| 
 | 
   675         patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
 | 
| 
 | 
   676 
 | 
| 
 | 
   677         hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
 | 
| 
 | 
   678         merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
 | 
| 
 | 
   679 
 | 
| 
 | 
   680       } else if (matches.in.2) {
 | 
| 
 | 
   681         #other.sample1 = other.sample.list[[first.sample]][1]
 | 
| 
 | 
   682         #other.sample2 = other.sample.list[[first.sample]][2]
 | 
| 
 | 
   683 
 | 
| 
 | 
   684         second.sample = sum.2[,"Sample"]
 | 
| 
 | 
   685 
 | 
| 
 | 
   686         current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
 | 
| 
 | 
   687 
 | 
| 
 | 
   688         merge.12 = merge(sum.1, sum.2, by="merge")
 | 
| 
 | 
   689 
 | 
| 
 | 
   690         current.merge.list = rbind(current.merge.list, merge.12)
 | 
| 
 | 
   691         duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
 | 
| 
 | 
   692 
 | 
| 
 | 
   693         patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
 | 
| 
 | 
   694 
 | 
| 
 | 
   695         hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
 | 
| 
 | 
   696         merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
 | 
| 
 | 
   697 
 | 
| 
 | 
   698       } else if (matches.in.3) {
 | 
| 
 | 
   699 
 | 
| 
 | 
   700         #other.sample1 = other.sample.list[[first.sample]][1]
 | 
| 
 | 
   701         #other.sample2 = other.sample.list[[first.sample]][2]
 | 
| 
 | 
   702 
 | 
| 
 | 
   703         second.sample = sum.3[,"Sample"]
 | 
| 
 | 
   704 
 | 
| 
 | 
   705         current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
 | 
| 
 | 
   706 
 | 
| 
 | 
   707         merge.13 = merge(sum.1, sum.3, by="merge")
 | 
| 
 | 
   708 
 | 
| 
 | 
   709         current.merge.list = rbind(current.merge.list, merge.13)
 | 
| 
 | 
   710         duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
 | 
| 
 | 
   711 
 | 
| 
 | 
   712         patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
 | 
| 
 | 
   713 
 | 
| 
 | 
   714         hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
 | 
| 
 | 
   715         merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
 | 
| 
 | 
   716 
 | 
| 
 | 
   717       } else {
 | 
| 
 | 
   718         patient.fuzzy = patient.fuzzy[-1,]
 | 
| 
 | 
   719       }
 | 
| 
 | 
   720 
 | 
| 
 | 
   721       tmp.rows = rbind(rows.1, rows.2, rows.3)
 | 
| 
 | 
   722       tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
 | 
| 
 | 
   723 
 | 
| 
 | 
   724       if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
 | 
| 
 | 
   725         cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
 | 
| 
 | 
   726       } else {
 | 
| 
 | 
   727       }
 | 
| 
 | 
   728 
 | 
| 
 | 
   729     }
 | 
| 
 | 
   730     patient.merge.list[[paste(label1, "123")]] = patientMerge
 | 
| 
 | 
   731 
 | 
| 
 | 
   732     patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
 | 
| 
 | 
   733     patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
 | 
| 
 | 
   734     patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
 | 
| 
 | 
   735 
 | 
| 
 | 
   736     patient.merge.list[[paste(label1, "12")]] = patientMerge12
 | 
| 
 | 
   737     patient.merge.list[[paste(label1, "13")]] = patientMerge13
 | 
| 
 | 
   738     patient.merge.list[[paste(label1, "23")]] = patientMerge23
 | 
| 
 | 
   739 
 | 
| 
 | 
   740     patient.merge.list.second[[label1]] = merge.list[["second"]]
 | 
| 
 | 
   741   }
 | 
| 
 | 
   742   cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
 | 
| 
 | 
   743   patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
 | 
| 
 | 
   744   patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
 | 
| 
 | 
   745   patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
 | 
| 
20
 | 
   746   patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
 | 
| 
60
 | 
   747 
 | 
| 
 | 
   748   if(F){
 | 
| 
 | 
   749     patientMerge = merge(patient1, patient2, by="merge")
 | 
| 
 | 
   750     patientMerge = merge(patientMerge, patient3, by="merge")
 | 
| 
 | 
   751     colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
 | 
| 
 | 
   752     patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
 | 
| 
 | 
   753     patientMerge12 = merge(patient1, patient2, by="merge")
 | 
| 
 | 
   754     patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
 | 
| 
 | 
   755     patientMerge13 = merge(patient1, patient3, by="merge")
 | 
| 
 | 
   756     patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
 | 
| 
 | 
   757     patientMerge23 = merge(patient2, patient3, by="merge")
 | 
| 
 | 
   758     patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
 | 
| 
 | 
   759   }
 | 
| 
28
 | 
   760   
 | 
| 
30
 | 
   761   scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
 | 
| 
20
 | 
   762   scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
 | 
| 
30
 | 
   763   scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
 | 
| 
27
 | 
   764   scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
 | 
| 
20
 | 
   765   
 | 
| 
7
 | 
   766   res1 = vector()
 | 
| 
 | 
   767   res2 = vector()
 | 
| 
 | 
   768   res3 = vector()
 | 
| 
20
 | 
   769   res12 = vector()
 | 
| 
 | 
   770   res13 = vector()
 | 
| 
 | 
   771   res23 = vector()
 | 
| 
7
 | 
   772   resAll = vector()
 | 
| 
 | 
   773   read1Count = vector()
 | 
| 
 | 
   774   read2Count = vector()
 | 
| 
 | 
   775   read3Count = vector()
 | 
| 
 | 
   776   
 | 
| 
 | 
   777   if(appendTriplets){
 | 
| 
9
 | 
   778     cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
 | 
| 
7
 | 
   779   }
 | 
| 
 | 
   780   for(iter in 1:length(product[,1])){
 | 
| 
 | 
   781     threshhold = product[iter,threshholdIndex]
 | 
| 
 | 
   782     V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
 | 
| 
 | 
   783     J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
 | 
| 
18
 | 
   784     #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) 
 | 
| 
 | 
   785     all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
 | 
| 
7
 | 
   786     
 | 
| 
30
 | 
   787     one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
 | 
| 
 | 
   788     one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
 | 
| 
 | 
   789     two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
 | 
| 
24
 | 
   790     
 | 
| 
30
 | 
   791     one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
 | 
| 
 | 
   792     two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
 | 
| 
 | 
   793     three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
 | 
| 
20
 | 
   794     
 | 
| 
18
 | 
   795     read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
 | 
| 
 | 
   796     read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
 | 
| 
 | 
   797     read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
 | 
| 
7
 | 
   798     res1 = append(res1, sum(one))
 | 
| 
 | 
   799     res2 = append(res2, sum(two))
 | 
| 
 | 
   800     res3 = append(res3, sum(three))
 | 
| 
 | 
   801     resAll = append(resAll, sum(all))
 | 
| 
20
 | 
   802     res12 = append(res12, sum(one_two))
 | 
| 
 | 
   803     res13 = append(res13, sum(one_three))
 | 
| 
 | 
   804     res23 = append(res23, sum(two_three))
 | 
| 
7
 | 
   805     #threshhold = 0
 | 
| 
 | 
   806     if(threshhold != 0){
 | 
| 
 | 
   807       if(sum(one) > 0){
 | 
| 
20
 | 
   808         dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
 | 
| 
 | 
   809         colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
 | 
| 
7
 | 
   810         filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
 | 
| 
 | 
   811         write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   812       }
 | 
| 
 | 
   813       if(sum(two) > 0){
 | 
| 
20
 | 
   814         dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
 | 
| 
 | 
   815         colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
 | 
| 
7
 | 
   816         filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
 | 
| 
 | 
   817         write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   818       }
 | 
| 
 | 
   819       if(sum(three) > 0){
 | 
| 
20
 | 
   820         dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
 | 
| 
 | 
   821         colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
 | 
| 
7
 | 
   822         filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
 | 
| 
 | 
   823         write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   824       }
 | 
| 
20
 | 
   825       if(sum(one_two) > 0){
 | 
| 
 | 
   826         dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
 | 
| 
 | 
   827         colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
 | 
| 
 | 
   828         filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
 | 
| 
 | 
   829         write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   830       }
 | 
| 
 | 
   831       if(sum(one_three) > 0){
 | 
| 
 | 
   832         dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
 | 
| 
 | 
   833         colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
 | 
| 
 | 
   834         filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
 | 
| 
 | 
   835         write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   836       }
 | 
| 
 | 
   837       if(sum(two_three) > 0){
 | 
| 
 | 
   838         dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
 | 
| 
 | 
   839         colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
 | 
| 
 | 
   840         filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
 | 
| 
 | 
   841         write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   842       }
 | 
| 
 | 
   843     } else { #scatterplot data
 | 
| 
24
 | 
   844       scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
 | 
| 
60
 | 
   845       scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
 | 
| 
30
 | 
   846       in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
 | 
| 
27
 | 
   847       if(sum(in_two) > 0){
 | 
| 
 | 
   848 				scatterplot_locus_data[in_two,]$type = "In two"
 | 
| 
 | 
   849       }
 | 
| 
30
 | 
   850       in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
 | 
| 
27
 | 
   851       if(sum(in_three)> 0){
 | 
| 
 | 
   852 				scatterplot_locus_data[in_three,]$type = "In three"
 | 
| 
 | 
   853       }
 | 
| 
 | 
   854       not_in_one = scatterplot_locus_data$type != "In one"
 | 
| 
 | 
   855       if(sum(not_in_one) > 0){
 | 
| 
 | 
   856 				scatterplot_locus_data[not_in_one,]$type = "In multiple"
 | 
| 
 | 
   857       }
 | 
| 
20
 | 
   858       p = NULL
 | 
| 
 | 
   859       if(nrow(scatterplot_locus_data) != 0){
 | 
| 
 | 
   860         if(on == "normalized_read_count"){
 | 
| 
31
 | 
   861 					scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
 | 
| 
32
 | 
   862           p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
 | 
| 
20
 | 
   863         } else {
 | 
| 
32
 | 
   864           p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
 | 
| 
20
 | 
   865         }
 | 
| 
 | 
   866         p = p + geom_point(aes(colour=type), position="jitter")
 | 
| 
25
 | 
   867         p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
 | 
| 
20
 | 
   868       } else {
 | 
| 
 | 
   869         p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
 | 
| 
 | 
   870       }
 | 
| 
 | 
   871       png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
 | 
| 
 | 
   872       print(p)
 | 
| 
 | 
   873       dev.off()
 | 
| 
 | 
   874     } 
 | 
| 
7
 | 
   875     if(sum(all) > 0){
 | 
| 
20
 | 
   876       dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
 | 
| 
 | 
   877       colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
 | 
| 
7
 | 
   878       filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
 | 
| 
 | 
   879       write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   880     }
 | 
| 
 | 
   881   }
 | 
| 
20
 | 
   882   #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
 | 
| 
 | 
   883   patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
 | 
| 
7
 | 
   884   colnames(patientResult)[6] = oneSample
 | 
| 
20
 | 
   885   colnames(patientResult)[7] = twoSample
 | 
| 
 | 
   886   colnames(patientResult)[8] = threeSample
 | 
| 
 | 
   887   colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
 | 
| 
 | 
   888   colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
 | 
| 
 | 
   889   colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
 | 
| 
7
 | 
   890   
 | 
| 
 | 
   891   colnamesBak = colnames(patientResult)
 | 
| 
20
 | 
   892   colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
 | 
| 
7
 | 
   893   write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
 | 
| 
 | 
   894   colnames(patientResult) = colnamesBak
 | 
| 
 | 
   895   
 | 
| 
 | 
   896   patientResult$Locus = factor(patientResult$Locus, Titles)
 | 
| 
 | 
   897   patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
 | 
| 
 | 
   898   
 | 
| 
 | 
   899   plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
 | 
| 
 | 
   900   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
 | 
| 
 | 
   901   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
 | 
| 
 | 
   902   plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
 | 
| 
 | 
   903   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
 | 
| 
 | 
   904   plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
 | 
| 
 | 
   905   png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
 | 
| 
 | 
   906   print(plt)
 | 
| 
 | 
   907   dev.off()
 | 
| 
 | 
   908   
 | 
| 
 | 
   909   fontSize = 4
 | 
| 
 | 
   910   
 | 
| 
 | 
   911   bak = patientResult
 | 
| 
 | 
   912   patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
 | 
| 
 | 
   913   patientResult$relativeValue = patientResult$value * 10
 | 
| 
 | 
   914   patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
 | 
| 
 | 
   915   plt = ggplot(patientResult)
 | 
| 
 | 
   916   plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
 | 
| 
 | 
   917   plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
 | 
| 
 | 
   918   plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
 | 
| 
 | 
   919   plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
 | 
| 
 | 
   920   plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
 | 
| 
 | 
   921   plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
 | 
| 
 | 
   922   plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
 | 
| 
 | 
   923   png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
 | 
| 
 | 
   924   print(plt)
 | 
| 
 | 
   925   dev.off()
 | 
| 
 | 
   926 }
 | 
| 
 | 
   927 
 | 
| 
33
 | 
   928 if(nrow(triplets) != 0){
 | 
| 
60
 | 
   929 
 | 
| 
 | 
   930   cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
 | 
| 
 | 
   931 
 | 
| 
33
 | 
   932   triplets$uniqueID = "ID"
 | 
| 
 | 
   933   
 | 
| 
 | 
   934   triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
 | 
| 
 | 
   935   triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
 | 
| 
 | 
   936   triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
 | 
| 
 | 
   937   
 | 
| 
 | 
   938   triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
 | 
| 
 | 
   939   triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
 | 
| 
 | 
   940   triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
 | 
| 
 | 
   941   
 | 
| 
 | 
   942   triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
 | 
| 
60
 | 
   943 
 | 
| 
 | 
   944   cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
 | 
| 
 | 
   945 
 | 
| 
33
 | 
   946   triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
 | 
| 
 | 
   947   triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
 | 
| 
 | 
   948   min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
 | 
| 
 | 
   949   
 | 
| 
 | 
   950   triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
 | 
| 
 | 
   951   min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
 | 
| 
 | 
   952   
 | 
| 
 | 
   953   min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
 | 
| 
 | 
   954   
 | 
| 
 | 
   955   triplets = merge(triplets, min_cell_count, by="min_cell_paste")
 | 
| 
 | 
   956   
 | 
| 
 | 
   957   triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
 | 
| 
 | 
   958   
 | 
| 
 | 
   959   triplets = triplets[triplets$normalized_read_count >= min_cells,]
 | 
| 
 | 
   960   
 | 
| 
60
 | 
   961   column_drops = c("min_cell_count", "min_cell_paste")
 | 
| 
33
 | 
   962   
 | 
| 
 | 
   963   triplets = triplets[,!(colnames(triplets) %in% column_drops)]
 | 
| 
60
 | 
   964 
 | 
| 
 | 
   965   cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
 | 
| 
 | 
   966 
 | 
| 
33
 | 
   967   interval = intervalReads
 | 
| 
 | 
   968   intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
 | 
| 
 | 
   969   product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
 | 
| 
 | 
   970   
 | 
| 
 | 
   971   one = triplets[triplets$Sample == "14696_reg_BM",]
 | 
| 
 | 
   972   two = triplets[triplets$Sample == "24536_reg_BM",]
 | 
| 
 | 
   973   three = triplets[triplets$Sample == "24062_reg_BM",]
 | 
| 
61
 | 
   974   tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
 | 
| 
33
 | 
   975   
 | 
| 
 | 
   976   one = triplets[triplets$Sample == "16278_Left",]
 | 
| 
 | 
   977   two = triplets[triplets$Sample == "26402_Left",]
 | 
| 
 | 
   978   three = triplets[triplets$Sample == "26759_Left",]
 | 
| 
61
 | 
   979   tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
 | 
| 
33
 | 
   980   
 | 
| 
 | 
   981   one = triplets[triplets$Sample == "16278_Right",]
 | 
| 
 | 
   982   two = triplets[triplets$Sample == "26402_Right",]
 | 
| 
 | 
   983   three = triplets[triplets$Sample == "26759_Right",]
 | 
| 
53
 | 
   984   tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
 | 
| 
33
 | 
   985   
 | 
| 
60
 | 
   986   cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
 | 
| 
 | 
   987 
 | 
| 
33
 | 
   988   interval = intervalFreq
 | 
| 
 | 
   989   intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
 | 
| 
 | 
   990   product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
 | 
| 
 | 
   991   
 | 
| 
 | 
   992   one = triplets[triplets$Sample == "14696_reg_BM",]
 | 
| 
 | 
   993   two = triplets[triplets$Sample == "24536_reg_BM",]
 | 
| 
 | 
   994   three = triplets[triplets$Sample == "24062_reg_BM",]
 | 
| 
61
 | 
   995   tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
 | 
| 
33
 | 
   996   
 | 
| 
 | 
   997   one = triplets[triplets$Sample == "16278_Left",]
 | 
| 
 | 
   998   two = triplets[triplets$Sample == "26402_Left",]
 | 
| 
 | 
   999   three = triplets[triplets$Sample == "26759_Left",]
 | 
| 
61
 | 
  1000   tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
 | 
| 
33
 | 
  1001   
 | 
| 
 | 
  1002   one = triplets[triplets$Sample == "16278_Right",]
 | 
| 
 | 
  1003   two = triplets[triplets$Sample == "26402_Right",]
 | 
| 
 | 
  1004   three = triplets[triplets$Sample == "26759_Right",]
 | 
| 
53
 | 
  1005   tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
 | 
| 
33
 | 
  1006 } else {
 | 
| 
 | 
  1007   cat("", file="triplets.txt")
 | 
| 
 | 
  1008 }
 | 
| 
60
 | 
  1009 cat("</table></html>", file=logfile, append=T) |