annotate RScript.r @ 69:c532b3f8dc97 draft

Uploaded
author davidvanzessen
date Wed, 06 Jan 2016 11:00:00 -0500
parents ef13f0a3f4d6
children 9643b1fd9c45
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
2 options(scipen=999)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
5 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
8 min_cells = as.numeric(args[5])
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
9 mergeOn = args[6]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
10
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
11 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
12
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
16 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
17 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
18 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
66
ef3ac4f431bb Uploaded
davidvanzessen
parents: 65
diff changeset
21 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
22 dat$dsPerM = 0
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
23 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
24 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
25
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
26 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
27 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
28 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
29 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
30
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
31 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
32
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
33 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
34
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
35 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
36
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
48 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
49 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
50 dat = merge(dat, min_cell_count, by="min_cell_paste")
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
51 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
53
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
54 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
55
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
56 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
57
22
b662fdc7eff4 Uploaded
davidvanzessen
parents: 20
diff changeset
58 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
59 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
60 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
66
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
67 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
68
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
69 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
70 patient.merge.list.second = list()
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
71 scatter_locus_data_list = list()
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
72 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
73 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
74 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
75 if (!is.data.frame(x) & is.list(x)){
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
76 x = x[[1]]
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
77 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
78 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
79 x = data.frame(x,stringsAsFactors=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
82 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
84 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
85 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
86 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
87 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
88 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
89 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
90 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
91 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
92 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
99 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
101 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
102 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
103 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
104 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
105
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
106 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
107 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
108 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
109 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
110 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
111 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
112 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
113 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
115 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
117 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
118 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
119 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
120
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
121 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
122 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
123 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
124 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
125 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
126 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
127 }
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
128
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
129 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
130 #scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
131 scatterplot_data = patient1[NULL,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
132 #scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
133 #scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
134 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both"))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
135 #scatterplot_data$type = factor(x=NULL, levels=scatterplot.data.type.factor)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
136 scatterplot_data$type = character(0)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
137 scatterplot_data$link = numeric(0)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
138 scatterplot_data$on = character(0)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
139
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
140 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
141 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
142
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
143 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
144
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
145 start.time = proc.time()
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
146 merge.list = c()
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
147
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
148 if(patient %in% names(patient.merge.list)){
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
149 patientMerge = patient.merge.list[[patient]]
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
150 merge.list[["second"]] = patient.merge.list.second[[patient]]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
151 scatterplot_data = scatter_locus_data_list[[patient]]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
152 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
153
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
154 print(names(patient.merge.list))
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
155 } else {
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
156 #fuzzy matching here...
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
157 #merge.list = patientMerge$merge
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
158
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
159 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
160 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
161
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
162 patient1.fuzzy = patient1
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
163 patient2.fuzzy = patient2
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
164
36
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
165 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
166 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
167
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
168 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
169 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
170
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
171 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
172 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
173
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
174 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
175 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
176
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
177 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
178 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
179
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
180 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
181 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
182
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
183 merge.list = list()
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
184
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
185 merge.list[["second"]] = vector()
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
186
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
187 link.count = 1
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
188
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
189 while(nrow(patient.fuzzy) > 1){
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
190 first.merge = patient.fuzzy[1,"merge"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
191 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
192 first.sample = patient.fuzzy[1,"Sample"]
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
193 merge.filter = first.merge == patient.fuzzy$merge
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
194
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
195 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
196
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
197 first.sample.filter = first.sample == patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
198 second.sample.filter = first.sample != patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
199
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
200 #first match same sample, sum to a single row, same for other sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
201 #then merge rows like 'normal'
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
202
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
203 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
204
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
205
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
206
44
07278582b735 Uploaded
davidvanzessen
parents: 43
diff changeset
207 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
208 first.match.filter = merge.filter & sequence.filter & first.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
209 second.match.filter = merge.filter & sequence.filter & second.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
210
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
211 first.rows = patient.fuzzy[first.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
212 second.rows = patient.fuzzy[second.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
213
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
214 first.rows.v = table(first.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
215 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
216 first.rows.j = table(first.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
217 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
218
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
219 first.sum = data.frame(merge = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
220 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
221 Receptor = first.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
222 Sample = first.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
223 Cell_Count = first.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
224 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
225 Log10_Frequency = log10(sum(first.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
226 Total_Read_Count = sum(first.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
227 dsPerM = sum(first.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
228 J_Segment_Major_Gene = first.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
229 V_Segment_Major_Gene = first.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
230 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
231 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
232 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
233 Frequency = sum(first.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
234 locus_V = first.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
235 locus_J = first.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
236 min_cell_count = first.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
237 normalized_read_count = sum(first.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
238 paste = first.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
239 min_cell_paste = first.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
240
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
241 if(nrow(second.rows) > 0){
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
242 second.rows.v = table(second.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
243 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
244 second.rows.j = table(second.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
245 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
246
55
43ce3ebfc9b5 Uploaded
davidvanzessen
parents: 54
diff changeset
247 second.sum = data.frame(merge = first.clone.sequence,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
248 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
249 Receptor = second.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
250 Sample = second.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
251 Cell_Count = second.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
252 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
253 Log10_Frequency = log10(sum(second.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
254 Total_Read_Count = sum(second.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
255 dsPerM = sum(second.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
256 J_Segment_Major_Gene = second.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
257 V_Segment_Major_Gene = second.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
258 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
259 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
260 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
261 Frequency = sum(second.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
262 locus_V = second.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
263 locus_J = second.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
264 min_cell_count = second.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
265 normalized_read_count = sum(second.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
266 paste = second.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
267 min_cell_paste = second.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
268
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
269 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
270 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
271
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
272 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
273 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
274
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
275 tmp.rows = rbind(first.rows, second.rows)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
276 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
277
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
278
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
279 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
280 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
281 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both")
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
282 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
283 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
284
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
285 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
286
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
287 scatterplot.row = second.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
288 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both")
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
289 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
290 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
291
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
292 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
293
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
294 #write some information about the match to a log file
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
295 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
296 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
297 } else {
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
298 second.clone.sequence = second.rows[1,"Clone_Sequence"]
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
299 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
300 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
301 } else {
58
3ed7878f75c3 Uploaded
davidvanzessen
parents: 56
diff changeset
302 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
303 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
304 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
305
59
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
306 } else if(nrow(first.rows) > 1) {
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
307 if(patient1[1,"Sample"] == first.sample){
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
308 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
309 patient1 = rbind(patient1, first.sum)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
310 } else {
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
311 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
312 patient2 = rbind(patient2, first.sum)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
313 }
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
314
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
315 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
316 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
317
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
318 patient.fuzzy = patient.fuzzy[-first.match.filter,]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
319
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
320 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
321 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
322 scatterplot.row$type = first.sum[,"Sample"]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
323 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
324 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
325
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
326 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
59
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
327
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
328 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
329 } else {
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
330 patient.fuzzy = patient.fuzzy[-1,]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
331
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
332 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
333 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
334 scatterplot.row$type = first.sum[,"Sample"]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
335 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
336 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
337
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
338 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
339 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
340 link.count = link.count + 1
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
341 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
342 patient.merge.list[[patient]] <<- patientMerge
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
343 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
69
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
344
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
345 sample.order = data.frame(type = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both")),type.order = 1:4)
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
346 scatterplot_data = merge(scatterplot_data, sample.order, by="type")
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
347
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
348 scatter_locus_data_list[[patient]] <<- scatterplot_data
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
349 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
350 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
351
52
c5c2a790d476 Uploaded
davidvanzessen
parents: 51
diff changeset
352 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
c5c2a790d476 Uploaded
davidvanzessen
parents: 51
diff changeset
353 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
354
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
355
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
356 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
357 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
358 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
359 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
360 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
361 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
362 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
363 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
364 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
365
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
366 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
367 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
368 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
369 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
370 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
371 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
372 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
373 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
374 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
375 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
376 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
377 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
378 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
379 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
380 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
381 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
382 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
383 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
384 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
385 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
386 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
387 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
388 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
389 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
390 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
391 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
392 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
393 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
394 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
395 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
396 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
397 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
398 if(nrow(scatterplot_locus_data) > 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
399 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
400 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
401
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
402
69
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
403
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
404 p = NULL
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
405 print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data)))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
406 if(nrow(scatterplot_locus_data) != 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
407 if(on == "normalized_read_count"){
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
408 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
69
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
409 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,10^6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
410 } else {
69
c532b3f8dc97 Uploaded
davidvanzessen
parents: 68
diff changeset
411 p = ggplot(scatterplot_locus_data, aes(factor(reorder(type, type.order)), Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
412 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
413 p = p + geom_point(aes(colour=type), position="dodge")
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
414 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
415 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
416 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
417 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
418 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
419 print(p)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
420 dev.off()
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
421 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
422 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
423 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
424 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
425 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
426 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
427 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
428 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
429 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
430 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
431 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
432 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
433 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
434 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
435 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
436 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
437 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
438 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
439
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
440 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
441 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
442
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
443 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
444 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
445 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
446 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
447 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
448 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
449 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
450 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
451 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
452 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
453 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
454 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
455 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
456 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
457 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
458 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
459 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
460 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
461 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
462
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
463 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
464 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
465 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
466 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
467 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
468 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
469 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
470 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
471 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
472 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
473 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
474 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
475 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
476 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
477
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
478 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
479
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
480 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
481 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
482 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
483 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
484
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
485 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
486
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
487 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
488 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
489 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
490 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
491
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
492 if(nrow(single_patients) > 0){
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
493 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
494 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
495 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
496 p = p + xlab("In one or both samples") + ylab("Reads")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
497 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
498 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
499 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
500 dev.off()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
501
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
502 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
503 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
504 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
505 p = p + xlab("In one or both samples") + ylab("Frequency")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
506 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
507 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
508 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
509 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
510 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
511 empty <- data.frame()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
512 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
513
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
514 png("singles_reads_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
515 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
516 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
517
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
518 png("singles_freq_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
519 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
520 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
521 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
522
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
523 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
524 patient.merge.list.second = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
525
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
526 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
527 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
528 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
529 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
530 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
531 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
532 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
533 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
534 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
535
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
536 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
537 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
538 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
539 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
540 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
541 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
542 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
543 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
544 threeSample = paste(patient3[1,sampleIndex], sep="")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
545
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
546 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
547 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
548 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
549 patient3$merge = paste(patient3$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
550
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
551 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
552 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
553 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
554 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
555 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
556
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
557 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
558 patient1.fuzzy = patient1
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
559 patient2.fuzzy = patient2
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
560 patient3.fuzzy = patient3
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
561
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
562 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
563
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
564 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
565 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
566 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
567
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
568 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
65
7a63e90cfc3d Uploaded
davidvanzessen
parents: 64
diff changeset
569 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
570
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
571 other.sample.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
572 other.sample.list[[oneSample]] = c(twoSample, threeSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
573 other.sample.list[[twoSample]] = c(oneSample, threeSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
574 other.sample.list[[threeSample]] = c(oneSample, twoSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
575
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
576 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
577 patientMerge = merge(patientMerge, patient3, by="merge")
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
578 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
579 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
580 patientMerge = patientMerge[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
581
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
582 duo.merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
583
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
584 patientMerge12 = merge(patient1, patient2, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
585 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
586 patientMerge12 = patientMerge12[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
587 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
588 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
589
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
590 patientMerge13 = merge(patient1, patient3, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
591 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
592 patientMerge13 = patientMerge13[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
593 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
594 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
595
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
596 patientMerge23 = merge(patient2, patient3, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
597 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
598 patientMerge23 = patientMerge23[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
599 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
600 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
601
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
602 merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
603 merge.list[["second"]] = vector()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
604
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
605 start.time = proc.time()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
606 if(paste(label1, "123") %in% names(patient.merge.list)){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
607 patientMerge = patient.merge.list[[paste(label1, "123")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
608 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
609 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
610 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
611
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
612 merge.list[["second"]] = patient.merge.list.second[[label1]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
613
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
614 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
615 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
616 while(nrow(patient.fuzzy) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
617 first.merge = patient.fuzzy[1,"merge"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
618 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
619 first.sample = patient.fuzzy[1,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
620
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
621 merge.filter = first.merge == patient.fuzzy$merge
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
622
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
623 second.sample = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
624 third.sample = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
625
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
626 sample.filter.1 = first.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
627 sample.filter.2 = second.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
628 sample.filter.3 = third.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
629
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
630 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
631
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
632 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
633 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
634 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
635
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
636 matches.in.1 = any(match.filter.1)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
637 matches.in.2 = any(match.filter.2)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
638 matches.in.3 = any(match.filter.3)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
639
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
640
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
641
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
642 rows.1 = patient.fuzzy[match.filter.1,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
643
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
644 sum.1 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
645 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
646 Receptor = rows.1[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
647 Sample = rows.1[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
648 Cell_Count = rows.1[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
649 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
650 Log10_Frequency = log10(sum(rows.1$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
651 Total_Read_Count = sum(rows.1$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
652 dsPerM = sum(rows.1$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
653 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
654 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
655 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
656 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
657 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
658 Frequency = sum(rows.1$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
659 locus_V = rows.1[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
660 locus_J = rows.1[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
661 uniqueID = rows.1[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
662 normalized_read_count = sum(rows.1$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
663 sum.2 = sum.1[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
664 rows.2 = patient.fuzzy[match.filter.2,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
665 if(matches.in.2){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
666 sum.2 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
667 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
668 Receptor = rows.2[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
669 Sample = rows.2[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
670 Cell_Count = rows.2[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
671 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
672 Log10_Frequency = log10(sum(rows.2$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
673 Total_Read_Count = sum(rows.2$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
674 dsPerM = sum(rows.2$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
675 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
676 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
677 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
678 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
679 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
680 Frequency = sum(rows.2$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
681 locus_V = rows.2[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
682 locus_J = rows.2[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
683 uniqueID = rows.2[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
684 normalized_read_count = sum(rows.2$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
685 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
686
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
687 sum.3 = sum.1[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
688 rows.3 = patient.fuzzy[match.filter.3,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
689 if(matches.in.3){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
690 sum.3 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
691 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
692 Receptor = rows.3[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
693 Sample = rows.3[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
694 Cell_Count = rows.3[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
695 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
696 Log10_Frequency = log10(sum(rows.3$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
697 Total_Read_Count = sum(rows.3$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
698 dsPerM = sum(rows.3$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
699 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
700 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
701 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
702 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
703 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
704 Frequency = sum(rows.3$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
705 locus_V = rows.3[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
706 locus_J = rows.3[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
707 uniqueID = rows.3[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
708 normalized_read_count = sum(rows.3$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
709 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
710
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
711 if(matches.in.2 & matches.in.3){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
712 merge.123 = merge(sum.1, sum.2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
713 merge.123 = merge(merge.123, sum.3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
714 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
715 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
716
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
717 patientMerge = rbind(patientMerge, merge.123)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
718 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
719
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
720 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
721 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
722
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
723 } else if (matches.in.2) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
724 #other.sample1 = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
725 #other.sample2 = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
726
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
727 second.sample = sum.2[,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
728
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
729 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
730
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
731 merge.12 = merge(sum.1, sum.2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
732
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
733 current.merge.list = rbind(current.merge.list, merge.12)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
734 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
735
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
736 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
737
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
738 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
739 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
740
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
741 } else if (matches.in.3) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
742
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
743 #other.sample1 = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
744 #other.sample2 = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
745
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
746 second.sample = sum.3[,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
747
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
748 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
749
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
750 merge.13 = merge(sum.1, sum.3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
751
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
752 current.merge.list = rbind(current.merge.list, merge.13)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
753 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
754
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
755 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
756
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
757 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
758 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
759
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
760 } else if(nrow(rows.1) > 1){
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
761 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
762 print(names(patient1)[names(patient1) %in% sum.1])
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
763 print(names(patient1)[!(names(patient1) %in% sum.1)])
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
764 print(names(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
765 print(names(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
766 print(summary(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
767 print(summary(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
768 print(dim(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
769 print(dim(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
770 print(head(sum.1[,names(patient1)]))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
771 patient1 = rbind(patient1, sum.1[,names(patient1)])
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
772 patient.fuzzy = patient.fuzzy[-match.filter.1,]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
773 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
774 patient.fuzzy = patient.fuzzy[-1,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
775 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
776
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
777 tmp.rows = rbind(rows.1, rows.2, rows.3)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
778 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
779
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
780 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
781 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
782 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
783 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
784
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
785 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
786 patient.merge.list[[paste(label1, "123")]] = patientMerge
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
787
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
788 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
789 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
790 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
791
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
792 patient.merge.list[[paste(label1, "12")]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
793 patient.merge.list[[paste(label1, "13")]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
794 patient.merge.list[[paste(label1, "23")]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
795
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
796 patient.merge.list.second[[label1]] = merge.list[["second"]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
797 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
798 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
799 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
800 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
801 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
802 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
803
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
804 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
805 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
806 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
807 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
808
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
809 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
810 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
811 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
62
b1ad6f515338 Uploaded
davidvanzessen
parents: 61
diff changeset
812
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
813 if(F){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
814 patientMerge = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
815 patientMerge = merge(patientMerge, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
816 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
817 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
818 patientMerge12 = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
819 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
820 patientMerge13 = merge(patient1, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
821 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
822 patientMerge23 = merge(patient2, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
823 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
824 }
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
825
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
826 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
827 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
828 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
829 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
830
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
831 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
832 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
833 res3 = vector()
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
834 res12 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
835 res13 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
836 res23 = vector()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
837 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
838 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
839 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
840 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
841
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
842 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
843 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
844 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
845 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
846 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
847 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
848 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
849 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
850 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
851
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
852 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
853 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
854 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
855
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
856 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
857 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
858 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
859
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
860 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
861 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
862 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
863 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
864 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
865 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
866 resAll = append(resAll, sum(all))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
867 res12 = append(res12, sum(one_two))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
868 res13 = append(res13, sum(one_three))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
869 res23 = append(res23, sum(two_three))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
870 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
871 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
872 if(sum(one) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
873 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
874 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
875 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
876 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
877 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
878 if(sum(two) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
879 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
880 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
881 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
882 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
883 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
884 if(sum(three) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
885 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
886 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
887 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
888 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
889 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
890 if(sum(one_two) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
891 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
892 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
893 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
894 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
895 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
896 if(sum(one_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
897 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
898 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
899 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
900 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
901 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
902 if(sum(two_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
903 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
904 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
905 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
906 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
907 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
908 } else { #scatterplot data
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
909 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
910 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
911 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
912 if(sum(in_two) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
913 scatterplot_locus_data[in_two,]$type = "In two"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
914 }
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
915 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
916 if(sum(in_three)> 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
917 scatterplot_locus_data[in_three,]$type = "In three"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
918 }
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
919 not_in_one = scatterplot_locus_data$type != "In one"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
920 if(sum(not_in_one) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
921 scatterplot_locus_data[not_in_one,]$type = "In multiple"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
922 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
923 p = NULL
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
924 if(nrow(scatterplot_locus_data) != 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
925 if(on == "normalized_read_count"){
31
ce8bd23d0335 Uploaded
davidvanzessen
parents: 30
diff changeset
926 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
927 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
928 } else {
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
929 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
930 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
931 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
932 p = p + geom_point(aes(colour=type), position="jitter")
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
933 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
934 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
935 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
936 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
937 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
938 print(p)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
939 dev.off()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
940 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
941 if(sum(all) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
942 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
943 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
944 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
945 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
946 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
947 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
948 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
949 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
950 colnames(patientResult)[6] = oneSample
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
951 colnames(patientResult)[7] = twoSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
952 colnames(patientResult)[8] = threeSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
953 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
954 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
955 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
956
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
957 colnamesBak = colnames(patientResult)
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
958 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
959 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
960 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
961
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
962 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
963 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
964
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
965 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
966 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
967 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
968 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
969 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
970 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
971 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
972 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
973 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
974
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
975 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
976
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
977 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
978 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
979 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
980 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
981 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
982 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
983 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
984 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
985 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
986 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
987 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
988 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
989 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
990 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
991 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
992 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
993
67
40c72b9ffc79 Uploaded
davidvanzessen
parents: 66
diff changeset
994 if(F & nrow(triplets) != 0){
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
995
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
996 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
997
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
998 triplets$uniqueID = "ID"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
999
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1000 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1001 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1002 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1003
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1004 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1005 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1006 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1007
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1008 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1009
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1010 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1011
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1012 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1013 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1014 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1015
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1016 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1017 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1018
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1019 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1020
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1021 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1022
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1023 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1024
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1025 triplets = triplets[triplets$normalized_read_count >= min_cells,]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1026
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1027 column_drops = c("min_cell_count", "min_cell_paste")
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1028
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1029 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1030
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1031 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1032
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1033 interval = intervalReads
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1034 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1035 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1036
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1037 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1038 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1039 three = triplets[triplets$Sample == "24062_reg_BM",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1040 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1041
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1042 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1043 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1044 three = triplets[triplets$Sample == "26759_Left",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1045 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1046
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1047 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1048 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1049 three = triplets[triplets$Sample == "26759_Right",]
53
8ebe57feecd6 Uploaded
davidvanzessen
parents: 52
diff changeset
1050 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1051
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1052 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1053
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1054 interval = intervalFreq
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1055 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1056 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1057
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1058 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1059 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1060 three = triplets[triplets$Sample == "24062_reg_BM",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1061 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1062
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1063 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1064 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1065 three = triplets[triplets$Sample == "26759_Left",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1066 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1067
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1068 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1069 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1070 three = triplets[triplets$Sample == "26759_Right",]
53
8ebe57feecd6 Uploaded
davidvanzessen
parents: 52
diff changeset
1071 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1072 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1073 cat("", file="triplets.txt")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1074 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
1075 cat("</table></html>", file=logfile, append=T)