0
|
1 args <- commandArgs(trailingOnly = TRUE)
|
|
2
|
|
3 inFile = args[1]
|
|
4 outDir = args[2]
|
3
|
5 logfile = args[3]
|
|
6 min_freq = as.numeric(args[4])
|
|
7 min_cells = as.numeric(args[5])
|
29
|
8 mergeOn = args[6]
|
3
|
9
|
|
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
|
0
|
11
|
4
|
12 library(ggplot2)
|
|
13 library(reshape2)
|
|
14 library(data.table)
|
|
15 library(grid)
|
|
16 library(parallel)
|
0
|
17 #require(xtable)
|
3
|
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
|
13
|
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
|
9
|
20 dat = dat[!is.na(dat$Patient),]
|
13
|
21 dat$Related_to_leukemia_clone = F
|
9
|
22
|
0
|
23 setwd(outDir)
|
3
|
24 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
|
2
|
25 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
|
|
26 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
|
|
27
|
3
|
28 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
|
12
|
29
|
13
|
30 dat$Frequency = ((10^dat$Log10_Frequency)*100)
|
2
|
31
|
3
|
32 dat = dat[dat$Frequency >= min_freq,]
|
|
33
|
13
|
34 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
|
|
35
|
|
36 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
|
|
37
|
|
38 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
|
|
39 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
|
|
40 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
|
|
41
|
|
42 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
|
|
43 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
44
|
|
45 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
|
46
|
|
47 dat = merge(dat, min_cell_count, by="min_cell_paste")
|
|
48
|
|
49 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
50
|
3
|
51 dat = dat[dat$normalized_read_count >= min_cells,]
|
13
|
52
|
|
53 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
|
9
|
54
|
29
|
55 cat("<tr><td>Adding duplicate V+J+CDR3 sequences</td></tr>", file=logfile, append=T)
|
|
56 #remove duplicate V+J+CDR3, add together numerical values
|
|
57 dat= data.frame(data.table(dat)[, list(Receptor=unique(.SD$Receptor),
|
|
58 Cell_Count=unique(.SD$Cell_Count),
|
|
59 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
|
|
60 Total_Read_Count=sum(.SD$Total_Read_Count),
|
|
61 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
|
|
62 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
|
|
63 Frequency=sum(.SD$Frequency),
|
|
64 locus_V=unique(.SD$locus_V),
|
|
65 locus_J=unique(.SD$locus_J),
|
|
66 min_cell_count=unique(.SD$min_cell_count),
|
|
67 normalized_read_count=sum(.SD$normalized_read_count),
|
|
68 Log10_Frequency=sum(.SD$Log10_Frequency),
|
|
69 Clone_Sequence=.SD$Clone_Sequence[1],
|
|
70 min_cell_paste=.SD$min_cell_paste[1],
|
|
71 paste=unique(.SD$paste)), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
|
|
72
|
|
73
|
22
|
74 patients = split(dat, dat$Patient, drop=T)
|
9
|
75 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
|
6
|
76 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
|
0
|
77 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
|
|
78 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
|
|
79 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
|
|
80 Titles = factor(Titles, levels=Titles)
|
|
81 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
|
|
82
|
29
|
83 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
|
|
84
|
0
|
85 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
|
28
|
86 if (!is.data.frame(x) & is.list(x)){
|
|
87 x = x[[1]]
|
|
88 }
|
0
|
89 x$Sample = factor(x$Sample, levels=unique(x$Sample))
|
|
90 onShort = "reads"
|
|
91 if(on == "Frequency"){
|
|
92 onShort = "freq"
|
|
93 }
|
18
|
94 onx = paste(on, ".x", sep="")
|
|
95 ony = paste(on, ".y", sep="")
|
0
|
96 splt = split(x, x$Sample, drop=T)
|
4
|
97 type="pair"
|
2
|
98 if(length(splt) == 1){
|
3
|
99 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
|
4
|
100 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
|
|
101 type="single"
|
2
|
102 }
|
0
|
103 patient1 = splt[[1]]
|
|
104 patient2 = splt[[2]]
|
|
105
|
|
106 threshholdIndex = which(colnames(product) == "interval")
|
|
107 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
108 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
109 titleIndex = which(colnames(product) == "Titles")
|
|
110 sampleIndex = which(colnames(x) == "Sample")
|
|
111 patientIndex = which(colnames(x) == "Patient")
|
|
112 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
113 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
114 patient = paste(x[1,patientIndex])
|
3
|
115
|
0
|
116 switched = F
|
|
117 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
|
|
118 tmp = twoSample
|
|
119 twoSample = oneSample
|
|
120 oneSample = tmp
|
|
121 tmp = patient1
|
|
122 patient1 = patient2
|
|
123 patient2 = tmp
|
|
124 switched = T
|
|
125 }
|
|
126 if(appendtxt){
|
4
|
127 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
|
0
|
128 }
|
3
|
129 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
|
9
|
130
|
29
|
131 if(mergeOn == "Clone_Sequence"){
|
|
132 patient1$merge = paste(patient1$Clone_Sequence)
|
|
133 patient2$merge = paste(patient2$Clone_Sequence)
|
|
134 } else {
|
|
135 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
136 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
137 }
|
9
|
138
|
29
|
139 scatterplot_data_columns = c("Patient", "Sample", "Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene")
|
|
140 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
|
|
141 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),]
|
|
142 scatterplot_data$type = factor(x="In one", levels=c("In one", "In Both"))
|
|
143 scatterplot_data$on = onShort
|
|
144
|
9
|
145 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
|
18
|
146 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
|
0
|
147 res1 = vector()
|
|
148 res2 = vector()
|
|
149 resBoth = vector()
|
|
150 read1Count = vector()
|
|
151 read2Count = vector()
|
2
|
152 locussum1 = vector()
|
|
153 locussum2 = vector()
|
9
|
154
|
0
|
155 #for(iter in 1){
|
|
156 for(iter in 1:length(product[,1])){
|
|
157 threshhold = product[iter,threshholdIndex]
|
|
158 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
159 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
160 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
|
29
|
161 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
|
19
|
162 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge))
|
|
163 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge))
|
14
|
164 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
|
|
165 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
|
0
|
166 res1 = append(res1, sum(one))
|
2
|
167 res2 = append(res2, sum(two))
|
0
|
168 resBoth = append(resBoth, sum(both))
|
2
|
169 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
|
|
170 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
|
0
|
171 #threshhold = 0
|
|
172 if(threshhold != 0){
|
|
173 if(sum(one) > 0){
|
15
|
174 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
175 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
176 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
177 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
178 }
|
|
179 if(sum(two) > 0){
|
15
|
180 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
181 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
|
0
|
182 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
183 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
184 }
|
29
|
185 } else {
|
|
186 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
|
187 if(nrow(scatterplot_locus_data) > 0){
|
|
188 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
|
|
189 }
|
|
190 in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)
|
|
191 if(any(in_two)){
|
|
192 scatterplot_locus_data[in_two,]$type = "In Both"
|
|
193 }
|
|
194 if(type == "single"){
|
|
195 single_patients <<- rbind(single_patients, scatterplot_locus_data)
|
|
196 }
|
|
197 p = NULL
|
|
198 if(nrow(scatterplot_locus_data) != 0){
|
|
199 if(on == "normalized_read_count"){
|
|
200 scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
201 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
|
|
202 } else {
|
|
203 p = ggplot(scatterplot_locus_data, aes(type, Frequency))
|
|
204 }
|
|
205 p = p + geom_point(aes(colour=type), position="jitter")
|
|
206 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
207 } else {
|
|
208 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
|
|
209 }
|
|
210 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
211 print(p)
|
|
212 dev.off()
|
0
|
213 }
|
|
214 if(sum(both) > 0){
|
15
|
215 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
216 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
0
|
217 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
218 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
29
|
219 }
|
0
|
220 }
|
2
|
221 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
|
0
|
222 if(sum(is.na(patientResult$percentage)) > 0){
|
|
223 patientResult[is.na(patientResult$percentage),]$percentage = 0
|
|
224 }
|
|
225 colnames(patientResult)[6] = oneSample
|
|
226 colnames(patientResult)[8] = twoSample
|
|
227 colnamesBak = colnames(patientResult)
|
2
|
228 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
|
0
|
229 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
230 colnames(patientResult) = colnamesBak
|
|
231
|
|
232 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
233 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
234
|
|
235 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
|
|
236 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
|
|
237 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
238 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
|
|
239 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
|
|
240 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
241 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
242 print(plt)
|
|
243 dev.off()
|
|
244 #(t,r,b,l)
|
|
245 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
|
|
246 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
|
|
247 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
248 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
|
|
249 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
|
|
250 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
251 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
|
|
252 print(plt)
|
|
253 dev.off()
|
|
254
|
|
255 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
|
|
256 patientResult$relativeValue = patientResult$value * 10
|
|
257 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
258 plt = ggplot(patientResult)
|
|
259 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
260 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
261 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
262 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
|
|
263 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
|
|
264 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
|
|
265 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
|
|
266 print(plt)
|
|
267 dev.off()
|
|
268 }
|
|
269
|
3
|
270 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
|
|
271
|
0
|
272 interval = intervalFreq
|
|
273 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
274 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
29
|
275 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
|
0
|
276
|
3
|
277 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
|
|
278
|
0
|
279 interval = intervalReads
|
|
280 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
4
|
281 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
29
|
282 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
|
0
|
283
|
3
|
284 cat("</table></html>", file=logfile, append=T)
|
|
285
|
29
|
286 scales = 10^(0:ceiling(log10(max(single_patients$normalized_read_count))))
|
|
287 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
|
|
288 p = p + geom_point(aes(colour=type), position="jitter")
|
|
289 p = p + xlab("In one or both samples") + ylab("Reads")
|
|
290 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
|
|
291 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
|
|
292 print(p)
|
|
293 dev.off()
|
7
|
294
|
29
|
295 p = ggplot(single_patients, aes(Rearrangement, Frequency))
|
|
296 p = p + geom_point(aes(colour=type), position="jitter")
|
|
297 p = p + xlab("In one or both samples") + ylab("Frequency")
|
|
298 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
|
|
299 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)), height=1080)
|
|
300 print(p)
|
|
301 dev.off()
|
13
|
302
|
7
|
303 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
|
|
304 onShort = "reads"
|
|
305 if(on == "Frequency"){
|
|
306 onShort = "freq"
|
|
307 }
|
18
|
308 onx = paste(on, ".x", sep="")
|
|
309 ony = paste(on, ".y", sep="")
|
|
310 onz = paste(on, ".z", sep="")
|
7
|
311 type="triplet"
|
|
312
|
|
313 threshholdIndex = which(colnames(product) == "interval")
|
|
314 V_SegmentIndex = which(colnames(product) == "V_Segments")
|
|
315 J_SegmentIndex = which(colnames(product) == "J_Segments")
|
|
316 titleIndex = which(colnames(product) == "Titles")
|
|
317 sampleIndex = which(colnames(patient1) == "Sample")
|
|
318 patientIndex = which(colnames(patient1) == "Patient")
|
|
319 oneSample = paste(patient1[1,sampleIndex], sep="")
|
|
320 twoSample = paste(patient2[1,sampleIndex], sep="")
|
|
321 threeSample = paste(patient3[1,sampleIndex], sep="")
|
|
322
|
29
|
323 if(mergeOn == "Clone_Sequence"){
|
|
324 patient1$merge = paste(patient1$Clone_Sequence)
|
|
325 patient2$merge = paste(patient2$Clone_Sequence)
|
|
326 patient3$merge = paste(patient3$Clone_Sequence)
|
|
327
|
|
328 } else {
|
|
329 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
|
|
330 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
|
|
331 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
|
|
332 }
|
9
|
333
|
|
334 patientMerge = merge(patient1, patient2, by="merge")
|
|
335 patientMerge = merge(patientMerge, patient3, by="merge")
|
28
|
336 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
|
18
|
337 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
|
20
|
338 patientMerge12 = merge(patient1, patient2, by="merge")
|
|
339 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
|
|
340 patientMerge13 = merge(patient1, patient3, by="merge")
|
|
341 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
|
|
342 patientMerge23 = merge(patient2, patient3, by="merge")
|
|
343 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
|
|
344
|
28
|
345
|
24
|
346 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene")
|
20
|
347 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
|
|
348 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),]
|
27
|
349 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
|
20
|
350
|
7
|
351 res1 = vector()
|
|
352 res2 = vector()
|
|
353 res3 = vector()
|
20
|
354 res12 = vector()
|
|
355 res13 = vector()
|
|
356 res23 = vector()
|
7
|
357 resAll = vector()
|
|
358 read1Count = vector()
|
|
359 read2Count = vector()
|
|
360 read3Count = vector()
|
|
361
|
|
362 if(appendTriplets){
|
9
|
363 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
|
7
|
364 }
|
|
365 for(iter in 1:length(product[,1])){
|
|
366 threshhold = product[iter,threshholdIndex]
|
|
367 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
|
|
368 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
|
18
|
369 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
|
|
370 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
|
7
|
371
|
25
|
372 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$Clone_Sequence.x %in% patientMerge[all,]$merge))
|
|
373 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$Clone_Sequence.x %in% patientMerge[all,]$merge))
|
|
374 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$Clone_Sequence.x %in% patientMerge[all,]$merge))
|
24
|
375
|
25
|
376 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient1$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient1$Clone_Sequence %in% patientMerge13[one_three,]$merge))
|
|
377 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient2$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient2$Clone_Sequence %in% patientMerge23[two_three,]$merge))
|
|
378 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient3$Clone_Sequence %in% patientMerge13[one_three,]$merge) & !(patient3$Clone_Sequence %in% patientMerge23[two_three,]$merge))
|
20
|
379
|
18
|
380 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
|
|
381 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
|
|
382 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
|
7
|
383 res1 = append(res1, sum(one))
|
|
384 res2 = append(res2, sum(two))
|
|
385 res3 = append(res3, sum(three))
|
|
386 resAll = append(resAll, sum(all))
|
20
|
387 res12 = append(res12, sum(one_two))
|
|
388 res13 = append(res13, sum(one_three))
|
|
389 res23 = append(res23, sum(two_three))
|
7
|
390 #threshhold = 0
|
|
391 if(threshhold != 0){
|
|
392 if(sum(one) > 0){
|
20
|
393 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
394 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
395 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
396 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
397 }
|
|
398 if(sum(two) > 0){
|
20
|
399 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
400 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
401 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
402 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
403 }
|
|
404 if(sum(three) > 0){
|
20
|
405 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
|
|
406 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
|
7
|
407 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
408 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
409 }
|
20
|
410 if(sum(one_two) > 0){
|
|
411 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
412 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
|
|
413 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
414 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
415 }
|
|
416 if(sum(one_three) > 0){
|
|
417 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
418 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
419 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
420 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
421 }
|
|
422 if(sum(two_three) > 0){
|
|
423 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
|
|
424 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
|
425 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
|
|
426 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
427 }
|
|
428 } else { #scatterplot data
|
24
|
429 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
|
27
|
430 in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge12[one_two,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge13[one_three,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge23[two_three,]$Clone_Sequence.x)
|
|
431 if(sum(in_two) > 0){
|
|
432 scatterplot_locus_data[in_two,]$type = "In two"
|
|
433 }
|
|
434 in_three = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)
|
|
435 if(sum(in_three)> 0){
|
|
436 scatterplot_locus_data[in_three,]$type = "In three"
|
|
437 }
|
|
438 not_in_one = scatterplot_locus_data$type != "In one"
|
|
439 if(sum(not_in_one) > 0){
|
|
440 scatterplot_locus_data[not_in_one,]$type = "In multiple"
|
|
441 }
|
20
|
442 p = NULL
|
|
443 if(nrow(scatterplot_locus_data) != 0){
|
|
444 if(on == "normalized_read_count"){
|
|
445 scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
|
|
446 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
|
|
447 } else {
|
|
448 p = ggplot(scatterplot_locus_data, aes(type, Frequency))
|
|
449 }
|
|
450 p = p + geom_point(aes(colour=type), position="jitter")
|
25
|
451 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
20
|
452 } else {
|
|
453 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
|
|
454 }
|
|
455 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
|
|
456 print(p)
|
|
457 dev.off()
|
|
458 }
|
7
|
459 if(sum(all) > 0){
|
20
|
460 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
|
|
461 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
|
7
|
462 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
|
|
463 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
464 }
|
|
465 }
|
20
|
466 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
|
|
467 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
|
7
|
468 colnames(patientResult)[6] = oneSample
|
20
|
469 colnames(patientResult)[7] = twoSample
|
|
470 colnames(patientResult)[8] = threeSample
|
|
471 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
|
|
472 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
|
|
473 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
|
7
|
474
|
|
475 colnamesBak = colnames(patientResult)
|
20
|
476 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
|
7
|
477 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
|
|
478 colnames(patientResult) = colnamesBak
|
|
479
|
|
480 patientResult$Locus = factor(patientResult$Locus, Titles)
|
|
481 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
|
|
482
|
|
483 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
|
|
484 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
|
|
485 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
486 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
|
|
487 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
|
|
488 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
|
|
489 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
|
|
490 print(plt)
|
|
491 dev.off()
|
|
492
|
|
493 fontSize = 4
|
|
494
|
|
495 bak = patientResult
|
|
496 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
|
|
497 patientResult$relativeValue = patientResult$value * 10
|
|
498 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
|
|
499 plt = ggplot(patientResult)
|
|
500 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
|
|
501 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
|
502 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
|
|
503 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
|
|
504 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
|
|
505 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
|
|
506 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
|
|
507 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
|
|
508 print(plt)
|
|
509 dev.off()
|
|
510 }
|
|
511
|
9
|
512 triplets$uniqueID = "ID"
|
|
513
|
|
514 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
515 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
516 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
|
|
517
|
|
518 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
519 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
520 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
|
|
521
|
|
522 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
|
|
523
|
13
|
524 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
|
|
525 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
|
|
526 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
|
|
527
|
|
528 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
|
|
529 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
|
|
530
|
|
531 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
|
9
|
532
|
13
|
533 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
|
|
534
|
|
535 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
|
|
536
|
|
537 triplets = triplets[triplets$normalized_read_count >= min_cells,]
|
|
538
|
|
539 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
|
|
540
|
|
541 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
|
9
|
542
|
29
|
543 #remove duplicate V+J+CDR3, add together numerical values
|
|
544 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
|
|
545 Cell_Count=unique(.SD$Cell_Count),
|
|
546 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
|
|
547 Total_Read_Count=sum(.SD$Total_Read_Count),
|
|
548 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
|
|
549 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
|
|
550 Frequency=sum(.SD$Frequency),
|
|
551 normalized_read_count=sum(.SD$normalized_read_count),
|
|
552 Log10_Frequency=sum(.SD$Log10_Frequency),
|
|
553 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
|
|
554
|
|
555
|
7
|
556 interval = intervalReads
|
|
557 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
558 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
559
|
9
|
560 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
561 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
562 three = triplets[triplets$Sample == "24062_reg_BM",]
|
8
|
563 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
564
|
9
|
565 one = triplets[triplets$Sample == "16278_Left",]
|
|
566 two = triplets[triplets$Sample == "26402_Left",]
|
|
567 three = triplets[triplets$Sample == "26759_Left",]
|
8
|
568 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
569
|
9
|
570 one = triplets[triplets$Sample == "16278_Right",]
|
|
571 two = triplets[triplets$Sample == "26402_Right",]
|
|
572 three = triplets[triplets$Sample == "26759_Right",]
|
8
|
573 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
|
7
|
574
|
|
575
|
|
576 interval = intervalFreq
|
|
577 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
|
|
578 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
|
|
579
|
9
|
580 one = triplets[triplets$Sample == "14696_reg_BM",]
|
|
581 two = triplets[triplets$Sample == "24536_reg_BM",]
|
|
582 three = triplets[triplets$Sample == "24062_reg_BM",]
|
8
|
583 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
|
7
|
584
|
9
|
585 one = triplets[triplets$Sample == "16278_Left",]
|
|
586 two = triplets[triplets$Sample == "26402_Left",]
|
|
587 three = triplets[triplets$Sample == "26759_Left",]
|
8
|
588 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
|
7
|
589
|
9
|
590 one = triplets[triplets$Sample == "16278_Right",]
|
|
591 two = triplets[triplets$Sample == "26402_Right",]
|
|
592 three = triplets[triplets$Sample == "26759_Right",]
|
8
|
593 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
|