0
|
1 args <- commandArgs(trailingOnly = TRUE)
|
|
2
|
|
3 inFile = args[1]
|
|
4 outDir = args[2]
|
3
|
5 logfile = args[3]
|
|
6 min_freq = as.numeric(args[4])
|
|
7 min_cells = as.numeric(args[5])
|
29
|
8 mergeOn = args[6]
|
3
|
9
|
|
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
|
0
|
11
|
4
|
12 library(ggplot2)
|
|
13 library(reshape2)
|
|
14 library(data.table)
|
|
15 library(grid)
|
|
16 library(parallel)
|
0
|
17 #require(xtable)
|
3
|
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
|
13
|
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
|
9
|
20 dat = dat[!is.na(dat$Patient),]
|
13
|
21 dat$Related_to_leukemia_clone = F
|
9
|
22
|
0
|
23 setwd(outDir)
|
3
|
24 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
|
2
|
25 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
|
|
26 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
|
|
27
|
3
|
28 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
|
12
|
29
|
13
|
30 dat$Frequency = ((10^dat$Log10_Frequency)*100)
|
2
|
31
|
3
|
32 dat = dat[dat$Frequency >= min_freq,]
|
|
33
|
13
|
34 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
|
|
35
|
|
36 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
37
|
|
38 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
|
|
39 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
|
|
40 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
|
|
41
|
|
42 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
|
|
43 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
44
|
|
45 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
|
46
|
|
47 dat = merge(dat, min_cell_count, by="min_cell_paste")
|
|
48
|
|
49 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
50
|
3
|
51 dat = dat[dat$normalized_read_count >= min_cells,]
|
13
|
52
|
|
53 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
|
9
|
54
|
29
|
55 cat("<tr><td>Adding duplicate V+J+CDR3 sequences</td></tr>", file=logfile, append=T)
|
|
56 #remove duplicate V+J+CDR3, add together numerical values
|
|
57 dat= data.frame(data.table(dat)[, list(Receptor=unique(.SD$Receptor),
|
|
58 Cell_Count=unique(.SD$Cell_Count),
|
|
59 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
|
|
60 Total_Read_Count=sum(.SD$Total_Read_Count),
|
|
61 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
|
|
62 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
|
|
63 Frequency=sum(.SD$Frequency),
|
|
64 locus_V=unique(.SD$locus_V),
|
|
65 locus_J=unique(.SD$locus_J),
|
|
66 min_cell_count=unique(.SD$min_cell_count),
|
|
67 normalized_read_count=sum(.SD$normalized_read_count),
|
|
68 Log10_Frequency=sum(.SD$Log10_Frequency),
|
|
69 Clone_Sequence=.SD$Clone_Sequence[1],
|
|
70 min_cell_paste=.SD$min_cell_paste[1],
|
|
71 paste=unique(.SD$paste)), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
|
|
72
|
|
73
|
22
|
74 patients = split(dat, dat$Patient, drop=T)
|
9
|
75 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
|
6
|
76 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
|
0
|
77 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
|
|
78 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
|
|
79 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
|
|
80 Titles = factor(Titles, levels=Titles)
|
|
81 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
|
|
82
|
29
|
83 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
|
|
84
|
0
|
85 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
|
28
|
86 if (!is.data.frame(x) & is.list(x)){
|
|
87 x = x[[1]]
|
|
88 }
|
0
|
89 x$Sample = factor(x$Sample, levels=unique(x$Sample))
|
|
90 onShort = "reads"
|
|
91 if(on == "Frequency"){
|
|
92 onShort = "freq"
|
|
93 }
|
18
|
94 onx = paste(on, ".x", sep="")
|
|
95 ony = paste(on, ".y", sep="")
|
0
|
96 splt = split(x, x$Sample, drop=T)
|
4
|
97 type="pair"
|
2
|
98 if(length(splt) == 1){
|
3
|
99 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
|
4
|
100 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
|
|
101 type="single"
|
2
|
102 }
|
0
|
103 patient1 = splt[[1]]
|
|
104 patient2 = splt[[2]]
|
|
105
|
|
106 threshholdIndex = which(colnames(product) == "interval")
|
|
107 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
108 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
109 titleIndex = which(colnames(product) == "Titles")
|
|
110 sampleIndex = which(colnames(x) == "Sample")
|
|
111 patientIndex = which(colnames(x) == "Patient")
|
|
112 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
113 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
114 patient = paste(x[1,patientIndex])
|
3
|
115
|
0
|
116 switched = F
|
|
117 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
|
|
118 tmp = twoSample
|
|
119 twoSample = oneSample
|
|
120 oneSample = tmp
|
|
121 tmp = patient1
|
|
122 patient1 = patient2
|
|
123 patient2 = tmp
|
|
124 switched = T
|
|
125 }
|
|
126 if(appendtxt){
|
4
|
127 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
|
0
|
128 }
|
3
|
129 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
|
9
|
130
|
29
|
131 if(mergeOn == "Clone_Sequence"){
|
|
132 patient1$merge = paste(patient1$Clone_Sequence)
|
|
133 patient2$merge = paste(patient2$Clone_Sequence)
|
|
134 } else {
|
|
135 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
136 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
137 }
|
9
|
138
|
30
|
139 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
29
|
140 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
|
30
|
141 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
|
142 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
|
29
|
143 scatterplot_data$on = onShort
|
|
144
|
9
|
145 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
|
18
|
146 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
|
0
|
147 res1 = vector()
|
|
148 res2 = vector()
|
|
149 resBoth = vector()
|
|
150 read1Count = vector()
|
|
151 read2Count = vector()
|
2
|
152 locussum1 = vector()
|
|
153 locussum2 = vector()
|
9
|
154
|
0
|
155 #for(iter in 1){
|
|
156 for(iter in 1:length(product[,1])){
|
|
157 threshhold = product[iter,threshholdIndex]
|
|
158 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
159 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
160 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
|
29
|
161 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
|
30
|
162 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
|
|
163 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
|
14
|
164 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
|
|
165 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
|
0
|
166 res1 = append(res1, sum(one))
|
2
|
167 res2 = append(res2, sum(two))
|
0
|
168 resBoth = append(resBoth, sum(both))
|
2
|
169 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
|
|
170 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
|
0
|
171 #threshhold = 0
|
|
172 if(threshhold != 0){
|
|
173 if(sum(one) > 0){
|
15
|
174 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
175 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
176 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
177 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
178 }
|
|
179 if(sum(two) > 0){
|
15
|
180 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
181 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
182 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
183 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
184 }
|
29
|
185 } else {
|
|
186 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
|
187 if(nrow(scatterplot_locus_data) > 0){
|
|
188 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
|
|
189 }
|
30
|
190 in_both = (scatterplot_locus_data$merge %in% patientMerge[both,]$merge)
|
|
191 if(any(in_both)){
|
|
192 scatterplot_locus_data[in_both,]$type = "In Both"
|
|
193 }
|
|
194 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
|
|
195 not_in_one = !in_one
|
|
196 if(any(not_in_one)){
|
|
197 scatterplot_locus_data[not_in_one,]$type = twoSample
|
29
|
198 }
|
|
199 if(type == "single"){
|
|
200 single_patients <<- rbind(single_patients, scatterplot_locus_data)
|
|
201 }
|
|
202 p = NULL
|
|
203 if(nrow(scatterplot_locus_data) != 0){
|
|
204 if(on == "normalized_read_count"){
|
30
|
205 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
206 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
|
29
|
207 } else {
|
30
|
208 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
|
29
|
209 }
|
|
210 p = p + geom_point(aes(colour=type), position="jitter")
|
|
211 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
212 } else {
|
|
213 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
214 }
|
|
215 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
216 print(p)
|
|
217 dev.off()
|
0
|
218 }
|
|
219 if(sum(both) > 0){
|
15
|
220 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
221 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
0
|
222 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
223 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
29
|
224 }
|
0
|
225 }
|
2
|
226 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
|
0
|
227 if(sum(is.na(patientResult$percentage)) > 0){
|
|
228 patientResult[is.na(patientResult$percentage),]$percentage = 0
|
|
229 }
|
|
230 colnames(patientResult)[6] = oneSample
|
|
231 colnames(patientResult)[8] = twoSample
|
|
232 colnamesBak = colnames(patientResult)
|
2
|
233 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
|
0
|
234 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
235 colnames(patientResult) = colnamesBak
|
|
236
|
|
237 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
238 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
239
|
|
240 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
|
|
241 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
|
|
242 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
243 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
|
|
244 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
|
|
245 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
246 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
247 print(plt)
|
|
248 dev.off()
|
|
249 #(t,r,b,l)
|
|
250 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
|
|
251 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
|
|
252 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
253 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
|
|
254 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
|
|
255 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
256 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
257 print(plt)
|
|
258 dev.off()
|
|
259
|
|
260 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
|
|
261 patientResult$relativeValue = patientResult$value * 10
|
|
262 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
263 plt = ggplot(patientResult)
|
|
264 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
265 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
266 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
267 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
|
|
268 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
|
|
269 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
|
|
270 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
|
|
271 print(plt)
|
|
272 dev.off()
|
|
273 }
|
|
274
|
3
|
275 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
276
|
0
|
277 interval = intervalFreq
|
|
278 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
279 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
29
|
280 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
|
0
|
281
|
3
|
282 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
283
|
0
|
284 interval = intervalReads
|
|
285 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
286 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
29
|
287 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
|
0
|
288
|
3
|
289 cat("</table></html>", file=logfile, append=T)
|
|
290
|
31
|
291 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
29
|
292 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
|
|
293 p = p + geom_point(aes(colour=type), position="jitter")
|
|
294 p = p + xlab("In one or both samples") + ylab("Reads")
|
|
295 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
|
|
296 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
|
|
297 print(p)
|
|
298 dev.off()
|
7
|
299
|
29
|
300 p = ggplot(single_patients, aes(Rearrangement, Frequency))
|
|
301 p = p + geom_point(aes(colour=type), position="jitter")
|
|
302 p = p + xlab("In one or both samples") + ylab("Frequency")
|
|
303 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
304 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
|
|
305 print(p)
|
|
306 dev.off()
|
13
|
307
|
7
|
308 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
|
|
309 onShort = "reads"
|
|
310 if(on == "Frequency"){
|
|
311 onShort = "freq"
|
|
312 }
|
18
|
313 onx = paste(on, ".x", sep="")
|
|
314 ony = paste(on, ".y", sep="")
|
|
315 onz = paste(on, ".z", sep="")
|
7
|
316 type="triplet"
|
|
317
|
|
318 threshholdIndex = which(colnames(product) == "interval")
|
|
319 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
320 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
321 titleIndex = which(colnames(product) == "Titles")
|
|
322 sampleIndex = which(colnames(patient1) == "Sample")
|
|
323 patientIndex = which(colnames(patient1) == "Patient")
|
|
324 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
325 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
326 threeSample = paste(patient3[1,sampleIndex], sep="")
|
|
327
|
29
|
328 if(mergeOn == "Clone_Sequence"){
|
|
329 patient1$merge = paste(patient1$Clone_Sequence)
|
|
330 patient2$merge = paste(patient2$Clone_Sequence)
|
|
331 patient3$merge = paste(patient3$Clone_Sequence)
|
|
332
|
|
333 } else {
|
|
334 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
335 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
336 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
|
|
337 }
|
9
|
338
|
|
339 patientMerge = merge(patient1, patient2, by="merge")
|
|
340 patientMerge = merge(patientMerge, patient3, by="merge")
|
28
|
341 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
|
18
|
342 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
20
|
343 patientMerge12 = merge(patient1, patient2, by="merge")
|
|
344 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
345 patientMerge13 = merge(patient1, patient3, by="merge")
|
|
346 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
347 patientMerge23 = merge(patient2, patient3, by="merge")
|
|
348 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
349
|
28
|
350
|
30
|
351 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
|
20
|
352 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
|
30
|
353 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
|
27
|
354 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
|
20
|
355
|
7
|
356 res1 = vector()
|
|
357 res2 = vector()
|
|
358 res3 = vector()
|
20
|
359 res12 = vector()
|
|
360 res13 = vector()
|
|
361 res23 = vector()
|
7
|
362 resAll = vector()
|
|
363 read1Count = vector()
|
|
364 read2Count = vector()
|
|
365 read3Count = vector()
|
|
366
|
|
367 if(appendTriplets){
|
9
|
368 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
|
7
|
369 }
|
|
370 for(iter in 1:length(product[,1])){
|
|
371 threshhold = product[iter,threshholdIndex]
|
|
372 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
373 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
374 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
|
|
375 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
|
7
|
376
|
30
|
377 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
|
|
378 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
|
|
379 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
|
24
|
380
|
30
|
381 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
|
|
382 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
|
|
383 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
|
20
|
384
|
18
|
385 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
|
|
386 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
|
|
387 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
|
7
|
388 res1 = append(res1, sum(one))
|
|
389 res2 = append(res2, sum(two))
|
|
390 res3 = append(res3, sum(three))
|
|
391 resAll = append(resAll, sum(all))
|
20
|
392 res12 = append(res12, sum(one_two))
|
|
393 res13 = append(res13, sum(one_three))
|
|
394 res23 = append(res23, sum(two_three))
|
7
|
395 #threshhold = 0
|
|
396 if(threshhold != 0){
|
|
397 if(sum(one) > 0){
|
20
|
398 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
399 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
400 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
401 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
402 }
|
|
403 if(sum(two) > 0){
|
20
|
404 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
405 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
406 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
407 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
408 }
|
|
409 if(sum(three) > 0){
|
20
|
410 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
411 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
412 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
413 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
414 }
|
20
|
415 if(sum(one_two) > 0){
|
|
416 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
417 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
|
418 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
419 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
420 }
|
|
421 if(sum(one_three) > 0){
|
|
422 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
423 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
424 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
425 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
426 }
|
|
427 if(sum(two_three) > 0){
|
|
428 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
429 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
430 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
431 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
432 }
|
|
433 } else { #scatterplot data
|
24
|
434 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
30
|
435 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
|
27
|
436 if(sum(in_two) > 0){
|
|
437 scatterplot_locus_data[in_two,]$type = "In two"
|
|
438 }
|
30
|
439 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
|
27
|
440 if(sum(in_three)> 0){
|
|
441 scatterplot_locus_data[in_three,]$type = "In three"
|
|
442 }
|
|
443 not_in_one = scatterplot_locus_data$type != "In one"
|
|
444 if(sum(not_in_one) > 0){
|
|
445 scatterplot_locus_data[not_in_one,]$type = "In multiple"
|
|
446 }
|
20
|
447 p = NULL
|
|
448 if(nrow(scatterplot_locus_data) != 0){
|
|
449 if(on == "normalized_read_count"){
|
31
|
450 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
20
|
451 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
|
|
452 } else {
|
|
453 p = ggplot(scatterplot_locus_data, aes(type, Frequency))
|
|
454 }
|
|
455 p = p + geom_point(aes(colour=type), position="jitter")
|
25
|
456 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
20
|
457 } else {
|
|
458 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
|
459 }
|
|
460 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
461 print(p)
|
|
462 dev.off()
|
|
463 }
|
7
|
464 if(sum(all) > 0){
|
20
|
465 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
|
|
466 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
7
|
467 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
468 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
469 }
|
|
470 }
|
20
|
471 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
|
|
472 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
|
7
|
473 colnames(patientResult)[6] = oneSample
|
20
|
474 colnames(patientResult)[7] = twoSample
|
|
475 colnames(patientResult)[8] = threeSample
|
|
476 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
|
|
477 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
|
|
478 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
|
7
|
479
|
|
480 colnamesBak = colnames(patientResult)
|
20
|
481 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
|
7
|
482 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
483 colnames(patientResult) = colnamesBak
|
|
484
|
|
485 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
486 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
487
|
|
488 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
|
|
489 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
|
|
490 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
491 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
|
|
492 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
|
|
493 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
494 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
|
|
495 print(plt)
|
|
496 dev.off()
|
|
497
|
|
498 fontSize = 4
|
|
499
|
|
500 bak = patientResult
|
|
501 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
|
|
502 patientResult$relativeValue = patientResult$value * 10
|
|
503 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
504 plt = ggplot(patientResult)
|
|
505 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
506 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
507 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
508 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
|
|
509 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
|
|
510 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
|
|
511 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
|
|
512 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
|
|
513 print(plt)
|
|
514 dev.off()
|
|
515 }
|
|
516
|
9
|
517 triplets$uniqueID = "ID"
|
|
518
|
|
519 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
520 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
521 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
522
|
|
523 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
524 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
525 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
526
|
|
527 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
|
|
528
|
13
|
529 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
|
|
530 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
|
|
531 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
|
|
532
|
|
533 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
|
|
534 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
535
|
|
536 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
9
|
537
|
13
|
538 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
|
|
539
|
|
540 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
541
|
|
542 triplets = triplets[triplets$normalized_read_count >= min_cells,]
|
|
543
|
|
544 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
|
|
545
|
|
546 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
|
9
|
547
|
29
|
548 #remove duplicate V+J+CDR3, add together numerical values
|
|
549 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
|
|
550 Cell_Count=unique(.SD$Cell_Count),
|
|
551 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
|
|
552 Total_Read_Count=sum(.SD$Total_Read_Count),
|
|
553 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
|
|
554 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
|
|
555 Frequency=sum(.SD$Frequency),
|
|
556 normalized_read_count=sum(.SD$normalized_read_count),
|
|
557 Log10_Frequency=sum(.SD$Log10_Frequency),
|
|
558 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
|
|
559
|
|
560
|
7
|
561 interval = intervalReads
|
|
562 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
563 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
564
|
9
|
565 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
566 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
567 three = triplets[triplets$Sample == "24062_reg_BM",]
|
8
|
568 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
569
|
9
|
570 one = triplets[triplets$Sample == "16278_Left",]
|
|
571 two = triplets[triplets$Sample == "26402_Left",]
|
|
572 three = triplets[triplets$Sample == "26759_Left",]
|
8
|
573 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
574
|
9
|
575 one = triplets[triplets$Sample == "16278_Right",]
|
|
576 two = triplets[triplets$Sample == "26402_Right",]
|
|
577 three = triplets[triplets$Sample == "26759_Right",]
|
8
|
578 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
579
|
|
580
|
|
581 interval = intervalFreq
|
|
582 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
583 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
584
|
9
|
585 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
586 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
587 three = triplets[triplets$Sample == "24062_reg_BM",]
|
8
|
588 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
|
7
|
589
|
9
|
590 one = triplets[triplets$Sample == "16278_Left",]
|
|
591 two = triplets[triplets$Sample == "26402_Left",]
|
|
592 three = triplets[triplets$Sample == "26759_Left",]
|
8
|
593 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
|
7
|
594
|
9
|
595 one = triplets[triplets$Sample == "16278_Right",]
|
|
596 two = triplets[triplets$Sample == "26402_Right",]
|
|
597 three = triplets[triplets$Sample == "26759_Right",]
|
8
|
598 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
|