annotate RScript.txt @ 60:28c3695259c1 draft

Uploaded
author davidvanzessen
date Thu, 29 Oct 2015 11:21:33 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
60
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
2
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
5 logfile = args[3]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
6 min_freq = as.numeric(args[4])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
7 min_cells = as.numeric(args[5])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
8 mergeOn = args[6]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
9
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
11
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
12 library(ggplot2)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
13 library(reshape2)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
14 library(data.table)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
15 library(grid)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
16 library(parallel)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
17 #require(xtable)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
20 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Related_to_leukemia_clone", "Clone_Sequence")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
21 dat$dsPerM = 0
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
22 dat = dat[!is.na(dat$Patient),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
23 dat$Related_to_leukemia_clone = F
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
24
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
25 setwd(outDir)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
26 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
27 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
28 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
29
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
31
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
32 dat$Frequency = ((10^dat$Log10_Frequency)*100)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
33
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
34 dat = dat[dat$Frequency >= min_freq,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
35
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
36 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
37
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
38 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
39
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
40 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
41 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
42 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
43
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
44 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
45 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
46
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
47 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
48 print(paste("rows:", nrow(dat)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
49 dat = merge(dat, min_cell_count, by="min_cell_paste")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
50 print(paste("rows:", nrow(dat)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
51 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
52
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
53 dat = dat[dat$normalized_read_count >= min_cells,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
54
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
55 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
56
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
57 patients = split(dat, dat$Patient, drop=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
58 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
59 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
60 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
61 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
62 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
63 Titles = factor(Titles, levels=Titles)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
65
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
67
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
68 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
69 patient.merge.list.second = list()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
70 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
71 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
72 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
73 if (!is.data.frame(x) & is.list(x)){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
74 x = x[[1]]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
75 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
76 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
77 x = data.frame(x,stringsAsFactors=F)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
78 onShort = "reads"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
79 if(on == "Frequency"){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
80 onShort = "freq"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
81 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
82 onx = paste(on, ".x", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
83 ony = paste(on, ".y", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
84 splt = split(x, x$Sample, drop=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
85 type="pair"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
86 if(length(splt) == 1){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
87 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
88 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
89 type="single"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
90 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
91 patient1 = splt[[1]]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
92 patient2 = splt[[2]]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
93
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
94 threshholdIndex = which(colnames(product) == "interval")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
95 V_SegmentIndex = which(colnames(product) == "V_Segments")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
96 J_SegmentIndex = which(colnames(product) == "J_Segments")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
97 titleIndex = which(colnames(product) == "Titles")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
98 sampleIndex = which(colnames(x) == "Sample")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
99 patientIndex = which(colnames(x) == "Patient")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
100 oneSample = paste(patient1[1,sampleIndex], sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
101 twoSample = paste(patient2[1,sampleIndex], sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
102 patient = paste(x[1,patientIndex])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
103
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
104 switched = F
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
105 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
106 tmp = twoSample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
107 twoSample = oneSample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
108 oneSample = tmp
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
109 tmp = patient1
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
110 patient1 = patient2
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
111 patient2 = tmp
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
112 switched = T
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
113 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
114 if(appendtxt){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
115 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
116 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
117 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
118
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
119 if(mergeOn == "Clone_Sequence"){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
120 patient1$merge = paste(patient1$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
121 patient2$merge = paste(patient2$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
122 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
123 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
124 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
125 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
126
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
127 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
128 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
129 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
130 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
131 scatterplot_data$on = onShort
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
132
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
133 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
134 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
135
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
136 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
137
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
138 start.time = proc.time()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
139 merge.list = c()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
140
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
141 if(patient %in% names(patient.merge.list)){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
142 patientMerge = patient.merge.list[[patient]]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
143 merge.list[["second"]] = patient.merge.list.second[[patient]]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
144 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
145
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
146 print(names(patient.merge.list))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
147 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
148 #fuzzy matching here...
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
149 #merge.list = patientMerge$merge
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
150
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
151 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
152 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
153
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
154 patient1.fuzzy = patient1
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
155 patient2.fuzzy = patient2
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
156
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
157 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
158 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
159
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
160 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
161 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
162
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
163 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
164 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
165
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
166 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
167 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
168
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
169 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
170 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
171
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
172 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
173 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
174
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
175 merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
176
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
177 merge.list[["second"]] = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
178
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
179 while(nrow(patient.fuzzy) > 1){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
180 first.merge = patient.fuzzy[1,"merge"]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
181 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
182 first.sample = patient.fuzzy[1,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
183 merge.filter = first.merge == patient.fuzzy$merge
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
184
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
185 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
186
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
187 first.sample.filter = first.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
188 second.sample.filter = first.sample != patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
189
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
190 #first match same sample, sum to a single row, same for other sample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
191 #then merge rows like 'normal'
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
192
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
193 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
194
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
195
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
196
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
197 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
198 first.match.filter = merge.filter & sequence.filter & first.sample.filter
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
199 second.match.filter = merge.filter & sequence.filter & second.sample.filter
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
200
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
201 first.rows = patient.fuzzy[first.match.filter,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
202 second.rows = patient.fuzzy[second.match.filter,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
203
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
204 first.rows.v = table(first.rows$V_Segment_Major_Gene)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
205 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
206 first.rows.j = table(first.rows$J_Segment_Major_Gene)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
207 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
208
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
209 first.sum = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
210 Patient = patient,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
211 Receptor = first.rows[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
212 Sample = first.rows[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
213 Cell_Count = first.rows[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
214 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
215 Log10_Frequency = log10(sum(first.rows$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
216 Total_Read_Count = sum(first.rows$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
217 dsPerM = sum(first.rows$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
218 J_Segment_Major_Gene = first.rows.j,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
219 V_Segment_Major_Gene = first.rows.v,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
220 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
221 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
222 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
223 Frequency = sum(first.rows$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
224 locus_V = first.rows[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
225 locus_J = first.rows[1,"locus_J"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
226 min_cell_count = first.rows[1,"min_cell_count"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
227 normalized_read_count = sum(first.rows$normalized_read_count),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
228 paste = first.rows[1,"paste"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
229 min_cell_paste = first.rows[1,"min_cell_paste"])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
230
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
231 if(nrow(second.rows) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
232 second.rows.v = table(second.rows$V_Segment_Major_Gene)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
233 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
234 second.rows.j = table(second.rows$J_Segment_Major_Gene)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
235 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
236
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
237 second.sum = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
238 Patient = patient,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
239 Receptor = second.rows[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
240 Sample = second.rows[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
241 Cell_Count = second.rows[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
242 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
243 Log10_Frequency = log10(sum(second.rows$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
244 Total_Read_Count = sum(second.rows$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
245 dsPerM = sum(second.rows$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
246 J_Segment_Major_Gene = second.rows.j,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
247 V_Segment_Major_Gene = second.rows.v,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
248 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
249 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
250 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
251 Frequency = sum(second.rows$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
252 locus_V = second.rows[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
253 locus_J = second.rows[1,"locus_J"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
254 min_cell_count = second.rows[1,"min_cell_count"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
255 normalized_read_count = sum(second.rows$normalized_read_count),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
256 paste = second.rows[1,"paste"],
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
257 min_cell_paste = second.rows[1,"min_cell_paste"])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
258
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
259 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
260 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
261
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
262 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
263 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
264
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
265 tmp.rows = rbind(first.rows, second.rows)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
266 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
267
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
268 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
269 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
270 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
271 second.clone.sequence = second.rows[1,"Clone_Sequence"]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
272 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
273 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
274 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
275 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
276 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
277 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
278
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
279 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
280 patient.fuzzy = patient.fuzzy[-1,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
281 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
282 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
283 patient.merge.list[[patient]] <<- patientMerge
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
284 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
285 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
286 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
287
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
288 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
289 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
290
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
291
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
292 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
293 res1 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
294 res2 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
295 resBoth = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
296 read1Count = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
297 read2Count = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
298 locussum1 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
299 locussum2 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
300
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
301 #for(iter in 1){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
302 for(iter in 1:length(product[,1])){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
303 threshhold = product[iter,threshholdIndex]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
304 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
305 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
306 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
307 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
308 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
309 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
310 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
311 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
312 res1 = append(res1, sum(one))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
313 res2 = append(res2, sum(two))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
314 resBoth = append(resBoth, sum(both))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
315 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
316 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
317 #threshhold = 0
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
318 if(threshhold != 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
319 if(sum(one) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
320 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
321 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
322 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
323 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
324 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
325 if(sum(two) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
326 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
327 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
328 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
329 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
330 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
331 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
332 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
333 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
334 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
335 if(nrow(scatterplot_locus_data) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
336 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
337 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
338 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
339 in_two = (scatterplot_locus_data$merge %in% patient2$merge)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
340 if(any(in_two)){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
341 scatterplot_locus_data[in_two,]$type = twoSample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
342 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
343 in_both = (scatterplot_locus_data$merge %in% patientMerge$merge)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
344 #merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
345 #exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
346 if(any(in_both)){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
347 scatterplot_locus_data[in_both,]$type = "In Both"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
348 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
349 if(type == "single"){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
350 single_patients <<- rbind(single_patients, scatterplot_locus_data)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
351 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
352 p = NULL
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
353 if(nrow(scatterplot_locus_data) != 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
354 if(on == "normalized_read_count"){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
355 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
356 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
357 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
358 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
359 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
360 p = p + geom_point(aes(colour=type), position="jitter")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
361 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
362 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
363 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
364 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
365 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
366 print(p)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
367 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
368 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
369 if(sum(both) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
370 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
371 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
372 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
373 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
374 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
375 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
376 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
377 if(sum(is.na(patientResult$percentage)) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
378 patientResult[is.na(patientResult$percentage),]$percentage = 0
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
379 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
380 colnames(patientResult)[6] = oneSample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
381 colnames(patientResult)[8] = twoSample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
382 colnamesBak = colnames(patientResult)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
383 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
384 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
385 colnames(patientResult) = colnamesBak
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
386
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
387 patientResult$Locus = factor(patientResult$Locus, Titles)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
388 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
389
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
390 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
391 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
392 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
393 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
394 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
395 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
396 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
397 print(plt)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
398 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
399 #(t,r,b,l)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
400 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
401 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
402 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
403 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
404 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
405 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
406 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
407 print(plt)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
408 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
409
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
410 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
411 patientResult$relativeValue = patientResult$value * 10
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
412 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
413 plt = ggplot(patientResult)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
414 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
415 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
416 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
417 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
418 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
419 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
420 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
421 print(plt)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
422 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
423 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
424
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
425 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
426
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
427 interval = intervalFreq
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
428 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
429 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
430 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
431
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
432 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
433
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
434 interval = intervalReads
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
435 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
436 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
437 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
438
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
439 cat("</table></html>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
440
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
441
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
442 if(nrow(single_patients) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
443 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
444 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
445 p = p + geom_point(aes(colour=type), position="jitter")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
446 p = p + xlab("In one or both samples") + ylab("Reads")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
447 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
448 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
449 print(p)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
450 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
451
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
452 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
453 p = p + geom_point(aes(colour=type), position="jitter")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
454 p = p + xlab("In one or both samples") + ylab("Frequency")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
455 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
456 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
457 print(p)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
458 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
459 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
460 empty <- data.frame()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
461 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
462
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
463 png("singles_reads_scatterplot.png", width=400, height=300)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
464 print(p)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
465 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
466
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
467 png("singles_freq_scatterplot.png", width=400, height=300)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
468 print(p)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
469 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
470 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
471 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
472 onShort = "reads"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
473 if(on == "Frequency"){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
474 onShort = "freq"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
475 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
476 onx = paste(on, ".x", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
477 ony = paste(on, ".y", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
478 onz = paste(on, ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
479 type="triplet"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
480
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
481 threshholdIndex = which(colnames(product) == "interval")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
482 V_SegmentIndex = which(colnames(product) == "V_Segments")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
483 J_SegmentIndex = which(colnames(product) == "J_Segments")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
484 titleIndex = which(colnames(product) == "Titles")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
485 sampleIndex = which(colnames(patient1) == "Sample")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
486 patientIndex = which(colnames(patient1) == "Patient")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
487 oneSample = paste(patient1[1,sampleIndex], sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
488 twoSample = paste(patient2[1,sampleIndex], sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
489 threeSample = paste(patient3[1,sampleIndex], sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
490
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
491 if(mergeOn == "Clone_Sequence"){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
492 patient1$merge = paste(patient1$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
493 patient2$merge = paste(patient2$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
494 patient3$merge = paste(patient3$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
495
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
496 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
497 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
498 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
499 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
500 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
501
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
502 patientMerge = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
503 patientMerge = merge(patientMerge, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
504 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
505 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
506 patientMerge12 = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
507 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
508 patientMerge13 = merge(patient1, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
509 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
510 patientMerge23 = merge(patient2, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
511 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
512
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
513
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
514 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
515 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
516 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
517 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
518
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
519 res1 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
520 res2 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
521 res3 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
522 res12 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
523 res13 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
524 res23 = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
525 resAll = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
526 read1Count = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
527 read2Count = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
528 read3Count = vector()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
529
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
530 if(appendTriplets){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
531 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
532 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
533 for(iter in 1:length(product[,1])){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
534 threshhold = product[iter,threshholdIndex]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
535 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
536 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
537 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
538 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
539
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
540 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
541 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
542 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
543
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
544 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
545 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
546 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
547
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
548 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
549 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
550 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
551 res1 = append(res1, sum(one))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
552 res2 = append(res2, sum(two))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
553 res3 = append(res3, sum(three))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
554 resAll = append(resAll, sum(all))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
555 res12 = append(res12, sum(one_two))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
556 res13 = append(res13, sum(one_three))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
557 res23 = append(res23, sum(two_three))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
558 #threshhold = 0
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
559 if(threshhold != 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
560 if(sum(one) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
561 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
562 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
563 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
564 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
565 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
566 if(sum(two) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
567 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
568 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
569 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
570 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
571 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
572 if(sum(three) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
573 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
574 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
575 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
576 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
577 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
578 if(sum(one_two) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
579 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
580 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
581 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
582 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
583 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
584 if(sum(one_three) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
585 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
586 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
587 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
588 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
589 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
590 if(sum(two_three) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
591 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
592 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
593 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
594 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
595 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
596 } else { #scatterplot data
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
597 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
598 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
599 if(sum(in_two) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
600 scatterplot_locus_data[in_two,]$type = "In two"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
601 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
602 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
603 if(sum(in_three)> 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
604 scatterplot_locus_data[in_three,]$type = "In three"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
605 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
606 not_in_one = scatterplot_locus_data$type != "In one"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
607 if(sum(not_in_one) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
608 scatterplot_locus_data[not_in_one,]$type = "In multiple"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
609 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
610 p = NULL
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
611 if(nrow(scatterplot_locus_data) != 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
612 if(on == "normalized_read_count"){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
613 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
614 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
615 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
616 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
617 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
618 p = p + geom_point(aes(colour=type), position="jitter")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
619 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
620 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
621 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
622 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
623 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
624 print(p)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
625 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
626 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
627 if(sum(all) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
628 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
629 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
630 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
631 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
632 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
633 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
634 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
635 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
636 colnames(patientResult)[6] = oneSample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
637 colnames(patientResult)[7] = twoSample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
638 colnames(patientResult)[8] = threeSample
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
639 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
640 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
641 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
642
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
643 colnamesBak = colnames(patientResult)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
644 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
645 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
646 colnames(patientResult) = colnamesBak
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
647
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
648 patientResult$Locus = factor(patientResult$Locus, Titles)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
649 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
650
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
651 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
652 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
653 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
654 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
655 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
656 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
657 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
658 print(plt)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
659 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
660
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
661 fontSize = 4
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
662
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
663 bak = patientResult
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
664 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
665 patientResult$relativeValue = patientResult$value * 10
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
666 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
667 plt = ggplot(patientResult)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
668 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
669 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
670 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
671 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
672 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
673 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
674 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
675 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
676 print(plt)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
677 dev.off()
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
678 }
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
679
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
680 if(nrow(triplets) != 0){
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
681 triplets$uniqueID = "ID"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
682
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
683 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
684 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
685 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
686
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
687 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
688 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
689 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
690
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
691 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
692
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
693 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
694 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
695 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
696
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
697 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
698 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
699
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
700 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
701
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
702 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
703
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
704 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
705
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
706 triplets = triplets[triplets$normalized_read_count >= min_cells,]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
707
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
708 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
709
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
710 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
711
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
712 #remove duplicate V+J+CDR3, add together numerical values
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
713 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
714 Cell_Count=unique(.SD$Cell_Count),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
715 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
716 Total_Read_Count=sum(.SD$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
717 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
718 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
719 Frequency=sum(.SD$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
720 normalized_read_count=sum(.SD$normalized_read_count),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
721 Log10_Frequency=sum(.SD$Log10_Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
722 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
723
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
724
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
725 interval = intervalReads
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
726 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
727 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
728
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
729 one = triplets[triplets$Sample == "14696_reg_BM",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
730 two = triplets[triplets$Sample == "24536_reg_BM",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
731 three = triplets[triplets$Sample == "24062_reg_BM",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
732 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
733
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
734 one = triplets[triplets$Sample == "16278_Left",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
735 two = triplets[triplets$Sample == "26402_Left",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
736 three = triplets[triplets$Sample == "26759_Left",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
737 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
738
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
739 one = triplets[triplets$Sample == "16278_Right",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
740 two = triplets[triplets$Sample == "26402_Right",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
741 three = triplets[triplets$Sample == "26759_Right",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
742 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
743
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
744
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
745 interval = intervalFreq
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
746 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
747 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
748
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
749 one = triplets[triplets$Sample == "14696_reg_BM",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
750 two = triplets[triplets$Sample == "24536_reg_BM",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
751 three = triplets[triplets$Sample == "24062_reg_BM",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
752 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
753
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
754 one = triplets[triplets$Sample == "16278_Left",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
755 two = triplets[triplets$Sample == "26402_Left",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
756 three = triplets[triplets$Sample == "26759_Left",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
757 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
758
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
759 one = triplets[triplets$Sample == "16278_Right",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
760 two = triplets[triplets$Sample == "26402_Right",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
761 three = triplets[triplets$Sample == "26759_Right",]
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
762 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
763 } else {
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
764 cat("", file="triplets.txt")
28c3695259c1 Uploaded
davidvanzessen
parents:
diff changeset
765 }