0
|
1 args <- commandArgs(trailingOnly = TRUE)
|
|
2
|
|
3 inFile = args[1]
|
|
4 outDir = args[2]
|
3
|
5 logfile = args[3]
|
|
6 min_freq = as.numeric(args[4])
|
|
7 min_cells = as.numeric(args[5])
|
|
8
|
|
9 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
|
0
|
10
|
4
|
11 library(ggplot2)
|
|
12 library(reshape2)
|
|
13 library(data.table)
|
|
14 library(grid)
|
|
15 library(parallel)
|
0
|
16 #require(xtable)
|
3
|
17 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
|
13
|
18 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
|
9
|
19 dat = dat[!is.na(dat$Patient),]
|
13
|
20 dat$Related_to_leukemia_clone = F
|
9
|
21
|
0
|
22 setwd(outDir)
|
3
|
23 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
|
2
|
24 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
|
|
25 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
|
|
26
|
3
|
27 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
|
12
|
28
|
13
|
29 dat$Frequency = ((10^dat$Log10_Frequency)*100)
|
2
|
30
|
3
|
31 dat = dat[dat$Frequency >= min_freq,]
|
|
32
|
13
|
33 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
|
|
34
|
|
35 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
36
|
|
37 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
|
|
38 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
|
|
39 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
|
|
40
|
|
41 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
|
|
42 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
43
|
|
44 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
|
45
|
|
46 dat = merge(dat, min_cell_count, by="min_cell_paste")
|
|
47
|
|
48 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
49
|
3
|
50 dat = dat[dat$normalized_read_count >= min_cells,]
|
13
|
51
|
|
52 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
|
9
|
53
|
22
|
54 patients = split(dat, dat$Patient, drop=T)
|
9
|
55 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
|
6
|
56 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
|
0
|
57 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
|
|
58 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
|
|
59 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
|
|
60 Titles = factor(Titles, levels=Titles)
|
|
61 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
|
|
62
|
|
63 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
|
|
64 x$Sample = factor(x$Sample, levels=unique(x$Sample))
|
|
65 onShort = "reads"
|
|
66 if(on == "Frequency"){
|
|
67 onShort = "freq"
|
|
68 }
|
18
|
69 onx = paste(on, ".x", sep="")
|
|
70 ony = paste(on, ".y", sep="")
|
0
|
71 splt = split(x, x$Sample, drop=T)
|
4
|
72 type="pair"
|
2
|
73 if(length(splt) == 1){
|
3
|
74 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
|
4
|
75 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
|
|
76 type="single"
|
2
|
77 }
|
0
|
78 patient1 = splt[[1]]
|
|
79 patient2 = splt[[2]]
|
|
80
|
|
81 threshholdIndex = which(colnames(product) == "interval")
|
|
82 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
83 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
84 titleIndex = which(colnames(product) == "Titles")
|
|
85 sampleIndex = which(colnames(x) == "Sample")
|
|
86 patientIndex = which(colnames(x) == "Patient")
|
|
87 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
88 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
89 patient = paste(x[1,patientIndex])
|
3
|
90
|
0
|
91 switched = F
|
|
92 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
|
|
93 tmp = twoSample
|
|
94 twoSample = oneSample
|
|
95 oneSample = tmp
|
|
96 tmp = patient1
|
|
97 patient1 = patient2
|
|
98 patient2 = tmp
|
|
99 switched = T
|
|
100 }
|
|
101 if(appendtxt){
|
4
|
102 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
|
0
|
103 }
|
3
|
104 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
|
9
|
105
|
12
|
106 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
107 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
13
|
108 patient1$merge = paste(patient1$Clone_Sequence)
|
|
109 patient2$merge = paste(patient2$Clone_Sequence)
|
9
|
110
|
12
|
111 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
|
9
|
112 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
|
18
|
113 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
|
0
|
114 res1 = vector()
|
|
115 res2 = vector()
|
|
116 resBoth = vector()
|
|
117 read1Count = vector()
|
|
118 read2Count = vector()
|
2
|
119 locussum1 = vector()
|
|
120 locussum2 = vector()
|
9
|
121
|
|
122 print(patient)
|
0
|
123 #for(iter in 1){
|
|
124 for(iter in 1:length(product[,1])){
|
|
125 threshhold = product[iter,threshholdIndex]
|
|
126 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
127 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
128 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
|
|
129 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both higher than threshold
|
19
|
130 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge))
|
|
131 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge))
|
14
|
132 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
|
|
133 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
|
0
|
134 res1 = append(res1, sum(one))
|
2
|
135 res2 = append(res2, sum(two))
|
0
|
136 resBoth = append(resBoth, sum(both))
|
2
|
137 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
|
|
138 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
|
0
|
139 #threshhold = 0
|
|
140 if(threshhold != 0){
|
|
141 if(sum(one) > 0){
|
15
|
142 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
143 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
144 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
145 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
146 }
|
|
147 if(sum(two) > 0){
|
15
|
148 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
149 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
150 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
151 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
152 }
|
|
153 }
|
|
154 if(sum(both) > 0){
|
15
|
155 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
156 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
0
|
157 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
158 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
159 }
|
|
160 }
|
2
|
161 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
|
0
|
162 if(sum(is.na(patientResult$percentage)) > 0){
|
|
163 patientResult[is.na(patientResult$percentage),]$percentage = 0
|
|
164 }
|
|
165 colnames(patientResult)[6] = oneSample
|
|
166 colnames(patientResult)[8] = twoSample
|
|
167 colnamesBak = colnames(patientResult)
|
2
|
168 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
|
0
|
169 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
170 colnames(patientResult) = colnamesBak
|
|
171
|
|
172 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
173 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
174
|
|
175 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
|
|
176 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
|
|
177 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
178 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
|
|
179 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
|
|
180 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
181 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
182 print(plt)
|
|
183 dev.off()
|
|
184 #(t,r,b,l)
|
|
185 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
|
|
186 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
|
|
187 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
188 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
|
|
189 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
|
|
190 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
191 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
192 print(plt)
|
|
193 dev.off()
|
|
194
|
|
195 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
|
|
196 patientResult$relativeValue = patientResult$value * 10
|
|
197 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
198 plt = ggplot(patientResult)
|
|
199 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
200 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
201 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
202 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
|
|
203 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
|
|
204 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
|
|
205 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
|
|
206 print(plt)
|
|
207 dev.off()
|
|
208 }
|
|
209
|
3
|
210 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
211
|
0
|
212 interval = intervalFreq
|
|
213 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
214 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
215 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
|
0
|
216
|
3
|
217 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
218
|
0
|
219 interval = intervalReads
|
|
220 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
221 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
9
|
222 mclapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
|
0
|
223
|
3
|
224 cat("</table></html>", file=logfile, append=T)
|
|
225
|
7
|
226
|
13
|
227
|
7
|
228 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
|
|
229 onShort = "reads"
|
|
230 if(on == "Frequency"){
|
|
231 onShort = "freq"
|
|
232 }
|
18
|
233 onx = paste(on, ".x", sep="")
|
|
234 ony = paste(on, ".y", sep="")
|
|
235 onz = paste(on, ".z", sep="")
|
7
|
236 type="triplet"
|
|
237
|
|
238 threshholdIndex = which(colnames(product) == "interval")
|
|
239 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
240 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
241 titleIndex = which(colnames(product) == "Titles")
|
|
242 sampleIndex = which(colnames(patient1) == "Sample")
|
|
243 patientIndex = which(colnames(patient1) == "Patient")
|
|
244 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
245 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
246 threeSample = paste(patient3[1,sampleIndex], sep="")
|
|
247
|
12
|
248 #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
249 #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
250 #patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
|
|
251
|
15
|
252 patient1$merge = paste(patient1$Clone_Sequence)
|
|
253 patient2$merge = paste(patient2$Clone_Sequence)
|
|
254 patient3$merge = paste(patient3$Clone_Sequence)
|
9
|
255
|
|
256 patientMerge = merge(patient1, patient2, by="merge")
|
|
257 patientMerge = merge(patientMerge, patient3, by="merge")
|
13
|
258 colnames(patientMerge)[30:length(colnames(patientMerge))] = paste(colnames(patientMerge)[30:length(colnames(patientMerge))], ".z", sep="")
|
18
|
259 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
20
|
260
|
|
261 patientMerge12 = merge(patient1, patient2, by="merge")
|
|
262 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
263 patientMerge13 = merge(patient1, patient3, by="merge")
|
|
264 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
265 patientMerge23 = merge(patient2, patient3, by="merge")
|
|
266 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
267
|
|
268 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count")
|
|
269 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
|
|
270 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),]
|
|
271 scatterplot_data$type = factor(x="single", levels=c("In one", "In two", "In three"))
|
|
272
|
7
|
273 res1 = vector()
|
|
274 res2 = vector()
|
|
275 res3 = vector()
|
20
|
276 res12 = vector()
|
|
277 res13 = vector()
|
|
278 res23 = vector()
|
7
|
279 resAll = vector()
|
|
280 read1Count = vector()
|
|
281 read2Count = vector()
|
|
282 read3Count = vector()
|
|
283
|
|
284 if(appendTriplets){
|
9
|
285 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
|
7
|
286 }
|
|
287 for(iter in 1:length(product[,1])){
|
|
288 threshhold = product[iter,threshholdIndex]
|
|
289 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
290 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
291 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
|
|
292 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
|
19
|
293 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$merge))
|
|
294 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$merge))
|
|
295 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$merge))
|
7
|
296
|
20
|
297 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold)
|
|
298 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold)
|
|
299 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold)
|
|
300
|
18
|
301 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
|
|
302 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
|
|
303 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
|
7
|
304 res1 = append(res1, sum(one))
|
|
305 res2 = append(res2, sum(two))
|
|
306 res3 = append(res3, sum(three))
|
|
307 resAll = append(resAll, sum(all))
|
20
|
308 res12 = append(res12, sum(one_two))
|
|
309 res13 = append(res13, sum(one_three))
|
|
310 res23 = append(res23, sum(two_three))
|
7
|
311 #threshhold = 0
|
|
312 if(threshhold != 0){
|
|
313 if(sum(one) > 0){
|
20
|
314 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
315 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
316 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
317 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
318 }
|
|
319 if(sum(two) > 0){
|
20
|
320 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
321 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
322 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
323 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
324 }
|
|
325 if(sum(three) > 0){
|
20
|
326 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
327 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
328 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
329 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
330 }
|
20
|
331 if(sum(one_two) > 0){
|
|
332 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
333 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
|
334 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
335 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
336 }
|
|
337 if(sum(one_three) > 0){
|
|
338 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
339 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
340 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
341 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
342 }
|
|
343 if(sum(two_three) > 0){
|
|
344 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
345 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
346 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
347 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
348 }
|
|
349 } else { #scatterplot data
|
|
350 scatterplot_locus_data = scatterplot_data
|
|
351 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$Clone_Sequence %in% patientMerge12[one_two,]$Clone_Sequence.x, "In two", "In one")
|
|
352 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$Clone_Sequence %in% patientMerge13[one_three,]$Clone_Sequence.x, "In two", "In one")
|
|
353 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$Clone_Sequence %in% patientMerge23[two_three,]$Clone_Sequence.x, "In two", "In one")
|
|
354 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$type == "In two", ifelse(scatterplot_locus_data$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x, "In three", "In two"), "In one")
|
|
355 scatterplot_locus_data$type = ifelse(scatterplot_locus_data$type == "In one", "In one", "In multiple")
|
|
356
|
|
357 p = NULL
|
|
358 if(nrow(scatterplot_locus_data) != 0){
|
|
359 if(on == "normalized_read_count"){
|
|
360 scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
361 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
|
|
362 } else {
|
|
363 p = ggplot(scatterplot_locus_data, aes(type, Frequency))
|
|
364 }
|
|
365 p = p + geom_point(aes(colour=type), position="jitter")
|
|
366 p = p + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
|
367 } else {
|
|
368 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
|
369 }
|
|
370 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
371 print(p)
|
|
372 dev.off()
|
|
373 }
|
7
|
374 if(sum(all) > 0){
|
20
|
375 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
|
|
376 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
7
|
377 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
378 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
379 }
|
|
380 }
|
20
|
381 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
|
|
382 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
|
7
|
383 colnames(patientResult)[6] = oneSample
|
20
|
384 colnames(patientResult)[7] = twoSample
|
|
385 colnames(patientResult)[8] = threeSample
|
|
386 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
|
|
387 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
|
|
388 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
|
7
|
389
|
|
390 colnamesBak = colnames(patientResult)
|
20
|
391 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
|
7
|
392 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
393 colnames(patientResult) = colnamesBak
|
|
394
|
|
395 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
396 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
397
|
|
398 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
|
|
399 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
|
|
400 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
401 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
|
|
402 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
|
|
403 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
404 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
|
|
405 print(plt)
|
|
406 dev.off()
|
|
407
|
|
408 fontSize = 4
|
|
409
|
|
410 bak = patientResult
|
|
411 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
|
|
412 patientResult$relativeValue = patientResult$value * 10
|
|
413 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
414 plt = ggplot(patientResult)
|
|
415 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
416 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
417 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
418 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
|
|
419 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
|
|
420 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
|
|
421 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
|
|
422 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
|
|
423 print(plt)
|
|
424 dev.off()
|
|
425 }
|
|
426
|
9
|
427 triplets$uniqueID = "ID"
|
|
428
|
|
429 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
430 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
431 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
432
|
|
433 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
434 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
435 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
436
|
|
437 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
|
|
438
|
13
|
439 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
|
|
440 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
|
|
441 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
|
|
442
|
|
443 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
|
|
444 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
445
|
|
446 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
9
|
447
|
13
|
448 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
|
|
449
|
|
450 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
451
|
|
452 triplets = triplets[triplets$normalized_read_count >= min_cells,]
|
|
453
|
|
454 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
|
|
455
|
|
456 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
|
9
|
457
|
7
|
458 interval = intervalReads
|
|
459 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
460 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
461
|
9
|
462 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
463 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
464 three = triplets[triplets$Sample == "24062_reg_BM",]
|
8
|
465 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
466
|
9
|
467 one = triplets[triplets$Sample == "16278_Left",]
|
|
468 two = triplets[triplets$Sample == "26402_Left",]
|
|
469 three = triplets[triplets$Sample == "26759_Left",]
|
8
|
470 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
471
|
9
|
472 one = triplets[triplets$Sample == "16278_Right",]
|
|
473 two = triplets[triplets$Sample == "26402_Right",]
|
|
474 three = triplets[triplets$Sample == "26759_Right",]
|
8
|
475 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
476
|
|
477
|
|
478 interval = intervalFreq
|
|
479 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
480 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
481
|
9
|
482 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
483 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
484 three = triplets[triplets$Sample == "24062_reg_BM",]
|
8
|
485 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
|
7
|
486
|
9
|
487 one = triplets[triplets$Sample == "16278_Left",]
|
|
488 two = triplets[triplets$Sample == "26402_Left",]
|
|
489 three = triplets[triplets$Sample == "26759_Left",]
|
8
|
490 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
|
7
|
491
|
9
|
492 one = triplets[triplets$Sample == "16278_Right",]
|
|
493 two = triplets[triplets$Sample == "26402_Right",]
|
|
494 three = triplets[triplets$Sample == "26759_Right",]
|
8
|
495 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
|