annotate RScript.r @ 65:7a63e90cfc3d draft

Uploaded
author davidvanzessen
date Mon, 09 Nov 2015 08:19:53 -0500
parents 39fff0180d41
children ef3ac4f431bb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
5 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_cells = as.numeric(args[5])
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
8 mergeOn = args[6]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
9
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
11
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
12 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
16 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
17 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
20 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Related_to_leukemia_clone", "Clone_Sequence")]
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
21 dat$dsPerM = 0
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
22 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
23 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
24
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
25 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
26 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
27 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
28 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
29
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
31
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
32 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
33
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
34 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
35
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
36 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
48 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
49 dat = merge(dat, min_cell_count, by="min_cell_paste")
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
50 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
51 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
53 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
54
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
55 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
56
22
b662fdc7eff4 Uploaded
davidvanzessen
parents: 20
diff changeset
57 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
58 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
59 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
60 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
67
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
68 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
69 patient.merge.list.second = list()
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
70 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
71 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
72 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
73 if (!is.data.frame(x) & is.list(x)){
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
74 x = x[[1]]
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
75 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
76 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
77 x = data.frame(x,stringsAsFactors=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
78 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
79 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
82 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
83 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
84 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
85 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
86 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
87 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
88 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
89 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
90 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
91 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
92 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
99 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
101 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
102 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
103
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
104 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
105 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
106 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
107 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
108 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
109 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
110 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
111 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
112 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
113 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
115 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
117 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
118
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
119 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
120 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
121 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
122 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
123 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
124 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
125 }
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
126
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
127 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
128 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
129 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
130 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
131 scatterplot_data$on = onShort
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
132
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
133 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
134 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
135
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
136 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
137
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
138 start.time = proc.time()
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
139 merge.list = c()
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
140
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
141 if(patient %in% names(patient.merge.list)){
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
142 patientMerge = patient.merge.list[[patient]]
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
143 merge.list[["second"]] = patient.merge.list.second[[patient]]
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
144 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
145
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
146 print(names(patient.merge.list))
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
147 } else {
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
148 #fuzzy matching here...
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
149 #merge.list = patientMerge$merge
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
150
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
151 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
152 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
153
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
154 patient1.fuzzy = patient1
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
155 patient2.fuzzy = patient2
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
156
36
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
157 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
158 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
159
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
160 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
161 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
162
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
163 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
164 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
165
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
166 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
167 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
168
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
169 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
170 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
171
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
172 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
173 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
174
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
175 merge.list = list()
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
176
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
177 merge.list[["second"]] = vector()
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
178
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
179 while(nrow(patient.fuzzy) > 1){
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
180 first.merge = patient.fuzzy[1,"merge"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
181 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
182 first.sample = patient.fuzzy[1,"Sample"]
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
183 merge.filter = first.merge == patient.fuzzy$merge
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
184
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
185 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
186
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
187 first.sample.filter = first.sample == patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
188 second.sample.filter = first.sample != patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
189
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
190 #first match same sample, sum to a single row, same for other sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
191 #then merge rows like 'normal'
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
192
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
193 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
194
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
195
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
196
44
07278582b735 Uploaded
davidvanzessen
parents: 43
diff changeset
197 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
198 first.match.filter = merge.filter & sequence.filter & first.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
199 second.match.filter = merge.filter & sequence.filter & second.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
200
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
201 first.rows = patient.fuzzy[first.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
202 second.rows = patient.fuzzy[second.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
203
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
204 first.rows.v = table(first.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
205 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
206 first.rows.j = table(first.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
207 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
208
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
209 first.sum = data.frame(merge = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
210 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
211 Receptor = first.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
212 Sample = first.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
213 Cell_Count = first.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
214 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
215 Log10_Frequency = log10(sum(first.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
216 Total_Read_Count = sum(first.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
217 dsPerM = sum(first.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
218 J_Segment_Major_Gene = first.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
219 V_Segment_Major_Gene = first.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
220 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
221 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
222 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
223 Frequency = sum(first.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
224 locus_V = first.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
225 locus_J = first.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
226 min_cell_count = first.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
227 normalized_read_count = sum(first.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
228 paste = first.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
229 min_cell_paste = first.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
230
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
231 if(nrow(second.rows) > 0){
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
232 second.rows.v = table(second.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
233 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
234 second.rows.j = table(second.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
235 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
236
55
43ce3ebfc9b5 Uploaded
davidvanzessen
parents: 54
diff changeset
237 second.sum = data.frame(merge = first.clone.sequence,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
238 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
239 Receptor = second.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
240 Sample = second.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
241 Cell_Count = second.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
242 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
243 Log10_Frequency = log10(sum(second.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
244 Total_Read_Count = sum(second.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
245 dsPerM = sum(second.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
246 J_Segment_Major_Gene = second.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
247 V_Segment_Major_Gene = second.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
248 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
249 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
250 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
251 Frequency = sum(second.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
252 locus_V = second.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
253 locus_J = second.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
254 min_cell_count = second.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
255 normalized_read_count = sum(second.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
256 paste = second.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
257 min_cell_paste = second.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
258
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
259 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
260 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
261
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
262 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
263 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
264
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
265 tmp.rows = rbind(first.rows, second.rows)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
266 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
267
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
268 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
269 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
270 } else {
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
271 second.clone.sequence = second.rows[1,"Clone_Sequence"]
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
272 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
273 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
274 } else {
58
3ed7878f75c3 Uploaded
davidvanzessen
parents: 56
diff changeset
275 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
276 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
277 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
278
59
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
279 } else if(nrow(first.rows) > 1) {
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
280 if(patient1[1,"Sample"] == first.sample){
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
281 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
282 patient1 = rbind(patient1, first.sum)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
283 } else {
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
284 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
285 patient2 = rbind(patient2, first.sum)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
286 }
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
287
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
288 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
289 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
290
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
291 patient.fuzzy = patient.fuzzy[-first.match.filter,]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
292
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
293 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
294 } else {
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
295 patient.fuzzy = patient.fuzzy[-1,]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
296 }
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
297 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
298 patient.merge.list[[patient]] <<- patientMerge
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
299 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
300 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
301 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
302
52
c5c2a790d476 Uploaded
davidvanzessen
parents: 51
diff changeset
303 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
c5c2a790d476 Uploaded
davidvanzessen
parents: 51
diff changeset
304 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
305
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
306
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
307 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
308 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
309 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
310 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
311 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
312 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
313 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
314 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
315
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
316 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
317 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
318 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
319 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
320 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
321 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
322 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
323 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
324 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
325 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
326 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
327 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
328 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
329 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
330 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
331 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
332 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
333 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
334 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
335 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
336 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
337 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
338 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
339 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
340 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
341 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
342 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
343 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
344 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
345 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
346 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
347 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
348 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
349 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
350 if(nrow(scatterplot_locus_data) > 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
351 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
352 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
353 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
354 in_two = (scatterplot_locus_data$merge %in% patient2$merge)
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
355 if(any(in_two)){
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
356 scatterplot_locus_data[in_two,]$type = twoSample
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
357 }
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
358 in_both = (scatterplot_locus_data$merge %in% patientMerge$merge)
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
359 #merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
360 #exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches)
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
361 if(any(in_both)){
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
362 scatterplot_locus_data[in_both,]$type = "In Both"
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
363 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
364 if(type == "single"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
365 single_patients <<- rbind(single_patients, scatterplot_locus_data)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
366 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
367 p = NULL
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
368 if(nrow(scatterplot_locus_data) != 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
369 if(on == "normalized_read_count"){
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
370 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
371 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
372 } else {
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
373 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
374 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
375 p = p + geom_point(aes(colour=type), position="jitter")
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
376 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
377 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
378 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
379 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
380 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
381 print(p)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
382 dev.off()
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
383 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
384 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
385 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
386 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
387 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
388 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
389 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
390 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
391 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
392 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
393 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
394 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
395 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
396 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
397 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
398 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
399 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
400 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
401
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
402 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
403 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
404
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
405 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
406 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
407 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
408 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
409 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
410 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
411 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
412 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
413 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
414 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
415 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
416 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
417 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
418 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
419 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
420 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
421 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
422 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
423 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
424
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
425 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
426 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
427 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
428 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
429 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
430 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
431 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
432 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
433 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
434 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
435 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
436 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
437 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
438 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
439
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
440 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
441
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
442 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
443 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
444 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
445 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
446
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
447 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
448
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
449 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
450 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
451 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
452 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
453
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
454 if(nrow(single_patients) > 0){
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
455 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
456 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
457 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
458 p = p + xlab("In one or both samples") + ylab("Reads")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
459 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
460 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
461 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
462 dev.off()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
463
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
464 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
465 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
466 p = p + xlab("In one or both samples") + ylab("Frequency")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
467 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
468 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
469 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
470 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
471 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
472 empty <- data.frame()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
473 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
474
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
475 png("singles_reads_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
476 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
477 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
478
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
479 png("singles_freq_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
480 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
481 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
482 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
483
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
484 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
485 patient.merge.list.second = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
486
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
487 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
488 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
489 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
490 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
491 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
492 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
493 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
494 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
495 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
496
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
497 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
498 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
499 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
500 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
501 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
502 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
503 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
504 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
505 threeSample = paste(patient3[1,sampleIndex], sep="")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
506
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
507 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
508 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
509 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
510 patient3$merge = paste(patient3$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
511
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
512 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
513 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
514 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
515 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
516 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
517
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
518 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
519 patient1.fuzzy = patient1
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
520 patient2.fuzzy = patient2
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
521 patient3.fuzzy = patient3
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
522
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
523 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
524
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
525 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
526 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
527 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
528
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
529 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
65
7a63e90cfc3d Uploaded
davidvanzessen
parents: 64
diff changeset
530 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
531
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
532 other.sample.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
533 other.sample.list[[oneSample]] = c(twoSample, threeSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
534 other.sample.list[[twoSample]] = c(oneSample, threeSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
535 other.sample.list[[threeSample]] = c(oneSample, twoSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
536
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
537 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
538 patientMerge = merge(patientMerge, patient3, by="merge")
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
539 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
540 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
541 patientMerge = patientMerge[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
542
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
543 duo.merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
544
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
545 patientMerge12 = merge(patient1, patient2, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
546 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
547 patientMerge12 = patientMerge12[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
548 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
549 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
550
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
551 patientMerge13 = merge(patient1, patient3, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
552 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
553 patientMerge13 = patientMerge13[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
554 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
555 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
556
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
557 patientMerge23 = merge(patient2, patient3, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
558 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
559 patientMerge23 = patientMerge23[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
560 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
561 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
562
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
563 merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
564 merge.list[["second"]] = vector()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
565
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
566 start.time = proc.time()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
567 if(paste(label1, "123") %in% names(patient.merge.list)){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
568 patientMerge = patient.merge.list[[paste(label1, "123")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
569 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
570 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
571 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
572
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
573 merge.list[["second"]] = patient.merge.list.second[[label1]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
574
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
575 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
576 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
577 while(nrow(patient.fuzzy) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
578 first.merge = patient.fuzzy[1,"merge"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
579 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
580 first.sample = patient.fuzzy[1,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
581
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
582 merge.filter = first.merge == patient.fuzzy$merge
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
583
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
584 second.sample = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
585 third.sample = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
586
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
587 sample.filter.1 = first.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
588 sample.filter.2 = second.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
589 sample.filter.3 = third.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
590
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
591 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
592
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
593 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
594 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
595 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
596
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
597 matches.in.1 = any(match.filter.1)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
598 matches.in.2 = any(match.filter.2)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
599 matches.in.3 = any(match.filter.3)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
600
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
601
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
602
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
603 rows.1 = patient.fuzzy[match.filter.1,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
604
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
605 sum.1 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
606 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
607 Receptor = rows.1[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
608 Sample = rows.1[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
609 Cell_Count = rows.1[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
610 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
611 Log10_Frequency = log10(sum(rows.1$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
612 Total_Read_Count = sum(rows.1$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
613 dsPerM = sum(rows.1$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
614 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
615 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
616 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
617 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
618 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
619 Frequency = sum(rows.1$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
620 locus_V = rows.1[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
621 locus_J = rows.1[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
622 uniqueID = rows.1[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
623 normalized_read_count = sum(rows.1$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
624 sum.2 = sum.1[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
625 rows.2 = patient.fuzzy[match.filter.2,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
626 if(matches.in.2){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
627 sum.2 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
628 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
629 Receptor = rows.2[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
630 Sample = rows.2[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
631 Cell_Count = rows.2[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
632 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
633 Log10_Frequency = log10(sum(rows.2$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
634 Total_Read_Count = sum(rows.2$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
635 dsPerM = sum(rows.2$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
636 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
637 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
638 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
639 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
640 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
641 Frequency = sum(rows.2$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
642 locus_V = rows.2[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
643 locus_J = rows.2[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
644 uniqueID = rows.2[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
645 normalized_read_count = sum(rows.2$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
646 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
647
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
648 sum.3 = sum.1[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
649 rows.3 = patient.fuzzy[match.filter.3,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
650 if(matches.in.3){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
651 sum.3 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
652 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
653 Receptor = rows.3[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
654 Sample = rows.3[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
655 Cell_Count = rows.3[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
656 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
657 Log10_Frequency = log10(sum(rows.3$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
658 Total_Read_Count = sum(rows.3$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
659 dsPerM = sum(rows.3$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
660 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
661 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
662 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
663 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
664 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
665 Frequency = sum(rows.3$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
666 locus_V = rows.3[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
667 locus_J = rows.3[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
668 uniqueID = rows.3[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
669 normalized_read_count = sum(rows.3$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
670 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
671
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
672 if(matches.in.2 & matches.in.3){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
673 merge.123 = merge(sum.1, sum.2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
674 merge.123 = merge(merge.123, sum.3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
675 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
676 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
677
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
678 patientMerge = rbind(patientMerge, merge.123)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
679 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
680
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
681 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
682 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
683
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
684 } else if (matches.in.2) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
685 #other.sample1 = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
686 #other.sample2 = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
687
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
688 second.sample = sum.2[,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
689
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
690 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
691
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
692 merge.12 = merge(sum.1, sum.2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
693
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
694 current.merge.list = rbind(current.merge.list, merge.12)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
695 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
696
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
697 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
698
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
699 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
700 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
701
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
702 } else if (matches.in.3) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
703
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
704 #other.sample1 = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
705 #other.sample2 = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
706
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
707 second.sample = sum.3[,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
708
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
709 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
710
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
711 merge.13 = merge(sum.1, sum.3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
712
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
713 current.merge.list = rbind(current.merge.list, merge.13)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
714 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
715
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
716 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
717
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
718 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
719 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
720
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
721 } else if(nrow(rows.1) > 1){
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
722 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
723 print(names(patient1)[names(patient1) %in% sum.1])
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
724 print(names(patient1)[!(names(patient1) %in% sum.1)])
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
725 print(names(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
726 print(names(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
727 print(summary(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
728 print(summary(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
729 print(dim(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
730 print(dim(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
731 print(head(sum.1[,names(patient1)]))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
732 patient1 = rbind(patient1, sum.1[,names(patient1)])
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
733 patient.fuzzy = patient.fuzzy[-match.filter.1,]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
734 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
735 patient.fuzzy = patient.fuzzy[-1,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
736 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
737
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
738 tmp.rows = rbind(rows.1, rows.2, rows.3)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
739 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
740
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
741 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
742 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
743 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
744 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
745
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
746 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
747 patient.merge.list[[paste(label1, "123")]] = patientMerge
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
748
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
749 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
750 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
751 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
752
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
753 patient.merge.list[[paste(label1, "12")]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
754 patient.merge.list[[paste(label1, "13")]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
755 patient.merge.list[[paste(label1, "23")]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
756
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
757 patient.merge.list.second[[label1]] = merge.list[["second"]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
758 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
759 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
760 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
761 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
762 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
763 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
764
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
765 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
766 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
767 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
62
b1ad6f515338 Uploaded
davidvanzessen
parents: 61
diff changeset
768
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
769 if(F){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
770 patientMerge = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
771 patientMerge = merge(patientMerge, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
772 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
773 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
774 patientMerge12 = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
775 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
776 patientMerge13 = merge(patient1, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
777 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
778 patientMerge23 = merge(patient2, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
779 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
780 }
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
781
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
782 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
783 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
784 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
785 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
786
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
787 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
788 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
789 res3 = vector()
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
790 res12 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
791 res13 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
792 res23 = vector()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
793 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
794 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
795 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
796 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
797
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
798 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
799 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
800 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
801 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
802 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
803 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
804 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
805 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
806 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
807
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
808 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
809 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
810 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
811
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
812 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
813 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
814 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
815
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
816 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
817 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
818 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
819 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
820 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
821 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
822 resAll = append(resAll, sum(all))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
823 res12 = append(res12, sum(one_two))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
824 res13 = append(res13, sum(one_three))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
825 res23 = append(res23, sum(two_three))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
826 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
827 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
828 if(sum(one) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
829 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
830 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
831 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
832 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
833 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
834 if(sum(two) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
835 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
836 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
837 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
838 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
839 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
840 if(sum(three) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
841 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
842 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
843 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
844 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
845 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
846 if(sum(one_two) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
847 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
848 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
849 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
850 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
851 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
852 if(sum(one_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
853 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
854 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
855 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
856 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
857 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
858 if(sum(two_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
859 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
860 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
861 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
862 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
863 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
864 } else { #scatterplot data
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
865 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
866 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
867 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
868 if(sum(in_two) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
869 scatterplot_locus_data[in_two,]$type = "In two"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
870 }
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
871 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
872 if(sum(in_three)> 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
873 scatterplot_locus_data[in_three,]$type = "In three"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
874 }
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
875 not_in_one = scatterplot_locus_data$type != "In one"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
876 if(sum(not_in_one) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
877 scatterplot_locus_data[not_in_one,]$type = "In multiple"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
878 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
879 p = NULL
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
880 if(nrow(scatterplot_locus_data) != 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
881 if(on == "normalized_read_count"){
31
ce8bd23d0335 Uploaded
davidvanzessen
parents: 30
diff changeset
882 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
883 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
884 } else {
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
885 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
886 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
887 p = p + geom_point(aes(colour=type), position="jitter")
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
888 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
889 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
890 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
891 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
892 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
893 print(p)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
894 dev.off()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
895 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
896 if(sum(all) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
897 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
898 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
899 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
900 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
901 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
902 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
903 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
904 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
905 colnames(patientResult)[6] = oneSample
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
906 colnames(patientResult)[7] = twoSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
907 colnames(patientResult)[8] = threeSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
908 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
909 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
910 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
911
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
912 colnamesBak = colnames(patientResult)
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
913 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
914 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
915 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
916
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
917 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
918 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
919
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
920 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
921 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
922 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
923 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
924 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
925 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
926 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
927 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
928 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
929
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
930 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
931
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
932 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
933 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
934 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
935 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
936 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
937 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
938 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
939 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
940 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
941 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
942 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
943 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
944 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
945 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
946 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
947 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
948
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
949 if(nrow(triplets) != 0){
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
950
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
951 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
952
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
953 triplets$uniqueID = "ID"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
954
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
955 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
956 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
957 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
958
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
959 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
960 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
961 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
962
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
963 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
964
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
965 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
966
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
967 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
968 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
969 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
970
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
971 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
972 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
973
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
974 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
975
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
976 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
977
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
978 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
979
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
980 triplets = triplets[triplets$normalized_read_count >= min_cells,]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
981
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
982 column_drops = c("min_cell_count", "min_cell_paste")
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
983
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
984 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
985
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
986 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
987
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
988 interval = intervalReads
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
989 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
990 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
991
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
992 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
993 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
994 three = triplets[triplets$Sample == "24062_reg_BM",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
995 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
996
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
997 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
998 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
999 three = triplets[triplets$Sample == "26759_Left",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1000 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1001
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1002 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1003 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1004 three = triplets[triplets$Sample == "26759_Right",]
53
8ebe57feecd6 Uploaded
davidvanzessen
parents: 52
diff changeset
1005 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1006
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1007 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1008
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1009 interval = intervalFreq
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1010 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1011 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1012
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1013 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1014 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1015 three = triplets[triplets$Sample == "24062_reg_BM",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1016 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1017
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1018 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1019 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1020 three = triplets[triplets$Sample == "26759_Left",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1021 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1022
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1023 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1024 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1025 three = triplets[triplets$Sample == "26759_Right",]
53
8ebe57feecd6 Uploaded
davidvanzessen
parents: 52
diff changeset
1026 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1027 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1028 cat("", file="triplets.txt")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1029 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1030 cat("</table></html>", file=logfile, append=T)