annotate RScript.r @ 50:7dd7cefcf72d draft

Uploaded
author davidvanzessen
date Thu, 08 Oct 2015 10:07:28 -0400
parents 7658e9f3d416
children 17e677c72e49
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
5 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_cells = as.numeric(args[5])
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
8 mergeOn = args[6]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
9
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
10 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
11
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
12 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
16 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
17 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
18 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
19 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
20 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Related_to_leukemia_clone", "Clone_Sequence")]
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
21 dat$dsPerM = 0
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
22 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
23 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
24
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
25 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
26 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
27 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
28 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
29
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
31
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
32 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
33
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
34 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
35
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
36 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
48 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
49 dat = merge(dat, min_cell_count, by="min_cell_paste")
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
50 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
51 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
53 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
54
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
55 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
56
22
b662fdc7eff4 Uploaded
davidvanzessen
parents: 20
diff changeset
57 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
58 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
59 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
60 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
67
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
68 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
69 if (!is.data.frame(x) & is.list(x)){
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
70 x = x[[1]]
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
71 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
72 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
73 x = data.frame(x,stringsAsFactors=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
74 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
75 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
76 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
77 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
78 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
79 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
81 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
82 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
83 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
84 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
85 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
86 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
87 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
88 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
89
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
90 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
91 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
92 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
99
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
101 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
102 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
103 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
104 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
105 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
106 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
107 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
108 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
109 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
110 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
111 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
112 }
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
113 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
114
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
115 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
116 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
117 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
118 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
119 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
120 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
121 }
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
122
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
123 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
124 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
125 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
126 scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
127 scatterplot_data$on = onShort
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
128
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
129 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
130 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
131
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
132 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
133
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
134
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
135 #fuzzy matching here...
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
136 if(mergeOn == "Clone_Sequence"){
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
137 #merge.list = patientMerge$merge
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
138
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
139 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
140 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
141
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
142 patient1.fuzzy = patient1
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
143 patient2.fuzzy = patient2
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
144
36
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
145 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
146 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
147
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
148 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
149 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
150
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
151 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
152 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
153
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
154 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
155 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
156
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
157 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
158 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
159
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
160 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
161 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
162
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
163 merge.list = list()
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
164
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
165 merge.list[["second"]] = vector()
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
166
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
167
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
168 while(nrow(patient.fuzzy) > 1){
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
169 first.merge = patient.fuzzy[1,"merge"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
170 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
171 first.sample = patient.fuzzy[1,"Sample"]
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
172 merge.filter = first.merge == patient.fuzzy$merge
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
173
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
174 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
42
7d4eda663c29 Uploaded
davidvanzessen
parents: 41
diff changeset
175
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
176 first.sample.filter = first.sample == patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
177 second.sample.filter = first.sample != patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
178
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
179 #first match same sample, sum to a single row, same for other sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
180 #then merge rows like 'normal'
43
2eef2c438500 Uploaded
davidvanzessen
parents: 42
diff changeset
181
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
182 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
183
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
184
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
185
44
07278582b735 Uploaded
davidvanzessen
parents: 43
diff changeset
186 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
187 first.match.filter = merge.filter & sequence.filter & first.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
188 second.match.filter = merge.filter & sequence.filter & second.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
189
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
190 first.rows = patient.fuzzy[first.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
191 second.rows = patient.fuzzy[second.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
192
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
193 first.rows.v = table(first.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
194 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
195 first.rows.j = table(first.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
196 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
197
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
198 first.sum = data.frame(merge = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
199 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
200 Receptor = first.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
201 Sample = first.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
202 Cell_Count = first.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
203 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
204 Log10_Frequency = log10(sum(first.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
205 Total_Read_Count = sum(first.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
206 dsPerM = sum(first.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
207 J_Segment_Major_Gene = first.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
208 V_Segment_Major_Gene = first.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
209 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
210 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
211 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
212 Frequency = sum(first.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
213 locus_V = first.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
214 locus_J = first.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
215 min_cell_count = first.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
216 normalized_read_count = sum(first.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
217 paste = first.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
218 min_cell_paste = first.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
219
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
220 if(nrow(second.rows) > 0){
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
221 second.rows.v = table(second.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
222 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
223 second.rows.j = table(second.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
224 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
225
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
226 second.sum = data.frame(merge = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
227 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
228 Receptor = second.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
229 Sample = second.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
230 Cell_Count = second.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
231 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
232 Log10_Frequency = log10(sum(second.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
233 Total_Read_Count = sum(second.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
234 dsPerM = sum(second.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
235 J_Segment_Major_Gene = second.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
236 V_Segment_Major_Gene = second.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
237 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
238 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
239 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
240 Frequency = sum(second.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
241 locus_V = second.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
242 locus_J = second.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
243 min_cell_count = second.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
244 normalized_read_count = sum(second.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
245 paste = second.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
246 min_cell_paste = second.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
247
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
248 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
249 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
250
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
251 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
252 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
253
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
254 if(sum(first.match.filter) == 1 & sum(second.match.filter) == 1){
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
255 second.clone.sequence = patient.fuzzy[second.match.filter, "Clone_Sequence"]
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
256 if(first.clone.sequence != second.clone.sequence){
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
257 #merge.list[["second"]] = append(merge.list[["second"]], second.clone.sequence)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
258 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
259 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
260
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
261 if(nrow(first.rows) > 1 | nrow(second.rows) > 1){
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
262
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
263 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
264
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
265 } else {
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
266 patient.fuzzy = patient.fuzzy[-1,]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
267 }
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
268 }
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
269
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
270 }
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
271
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
272
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
273 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
274 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
275 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
276 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
277 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
278 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
279 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
280 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
281
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
282 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
283 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
284 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
285 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
286 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
287 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
288 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
289 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
290 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
291 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
292 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
293 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
294 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
295 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
296 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
297 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
298 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
299 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
300 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
301 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
302 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
303 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
304 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
305 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
306 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
307 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
308 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
309 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
310 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
311 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
312 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
313 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
314 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
315 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
316 if(nrow(scatterplot_locus_data) > 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
317 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
318 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
319 in_one = (scatterplot_locus_data$merge %in% patient1$merge)
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
320 in_two = (scatterplot_locus_data$merge %in% patient2$merge)
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
321 if(any(in_two)){
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
322 scatterplot_locus_data[in_two,]$type = twoSample
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
323 }
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
324 in_both = (scatterplot_locus_data$merge %in% patientMerge$merge)
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
325 #merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
326 #exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches)
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
327 if(any(in_both)){
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
328 scatterplot_locus_data[in_both,]$type = "In Both"
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
329 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
330 if(type == "single"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
331 single_patients <<- rbind(single_patients, scatterplot_locus_data)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
332 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
333 p = NULL
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
334 if(nrow(scatterplot_locus_data) != 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
335 if(on == "normalized_read_count"){
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
336 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
337 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
338 } else {
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
339 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
340 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
341 p = p + geom_point(aes(colour=type), position="jitter")
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
342 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
343 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
344 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
345 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
346 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
347 print(p)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
348 dev.off()
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
349 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
350 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
351 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
352 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
353 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
354 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
355 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
356 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
357 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
358 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
359 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
360 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
361 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
362 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
363 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
364 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
365 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
366 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
367
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
368 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
369 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
370
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
371 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
372 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
373 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
374 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
375 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
376 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
377 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
378 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
379 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
380 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
381 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
382 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
383 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
384 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
385 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
386 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
387 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
388 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
389 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
390
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
391 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
392 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
393 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
394 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
395 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
396 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
397 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
398 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
399 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
400 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
401 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
402 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
403 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
404 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
405
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
406 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
407
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
408 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
409 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
410 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
411 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
412
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
413 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
414
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
415 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
416 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
417 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
418 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
419
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
420 cat("</table></html>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
421
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
422
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
423 if(nrow(single_patients) > 0){
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
424 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
425 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
426 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
427 p = p + xlab("In one or both samples") + ylab("Reads")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
428 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
429 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
430 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
431 dev.off()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
432
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
433 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
434 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
435 p = p + xlab("In one or both samples") + ylab("Frequency")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
436 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
437 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
438 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
439 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
440 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
441 empty <- data.frame()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
442 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
443
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
444 png("singles_reads_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
445 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
446 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
447
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
448 png("singles_freq_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
449 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
450 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
451 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
452 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
453 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
454 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
455 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
456 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
457 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
458 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
459 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
460 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
461
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
462 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
463 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
464 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
465 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
466 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
467 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
468 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
469 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
470 threeSample = paste(patient3[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
471
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
472 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
473 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
474 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
475 patient3$merge = paste(patient3$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
476
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
477 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
478 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
479 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
480 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
481 }
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
482
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
483 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
484 patientMerge = merge(patientMerge, patient3, by="merge")
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
485 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
486 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
487 patientMerge12 = merge(patient1, patient2, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
488 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
489 patientMerge13 = merge(patient1, patient3, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
490 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
491 patientMerge23 = merge(patient2, patient3, by="merge")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
492 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
493
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
494
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
495 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
496 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
497 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
498 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
499
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
500 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
501 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
502 res3 = vector()
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
503 res12 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
504 res13 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
505 res23 = vector()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
506 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
507 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
508 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
509 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
510
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
511 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
512 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
513 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
514 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
515 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
516 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
517 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
518 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
519 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
520
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
521 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
522 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
523 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
524
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
525 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
526 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
527 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
528
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
529 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
530 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
531 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
532 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
533 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
534 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
535 resAll = append(resAll, sum(all))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
536 res12 = append(res12, sum(one_two))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
537 res13 = append(res13, sum(one_three))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
538 res23 = append(res23, sum(two_three))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
539 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
540 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
541 if(sum(one) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
542 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
543 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
544 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
545 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
546 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
547 if(sum(two) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
548 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
549 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
550 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
551 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
552 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
553 if(sum(three) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
554 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
555 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
556 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
557 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
558 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
559 if(sum(one_two) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
560 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
561 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
562 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
563 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
564 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
565 if(sum(one_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
566 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
567 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
568 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
569 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
570 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
571 if(sum(two_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
572 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
573 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
574 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
575 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
576 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
577 } else { #scatterplot data
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
578 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
579 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
580 if(sum(in_two) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
581 scatterplot_locus_data[in_two,]$type = "In two"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
582 }
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
583 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
584 if(sum(in_three)> 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
585 scatterplot_locus_data[in_three,]$type = "In three"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
586 }
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
587 not_in_one = scatterplot_locus_data$type != "In one"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
588 if(sum(not_in_one) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
589 scatterplot_locus_data[not_in_one,]$type = "In multiple"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
590 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
591 p = NULL
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
592 if(nrow(scatterplot_locus_data) != 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
593 if(on == "normalized_read_count"){
31
ce8bd23d0335 Uploaded
davidvanzessen
parents: 30
diff changeset
594 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
595 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
596 } else {
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
597 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
598 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
599 p = p + geom_point(aes(colour=type), position="jitter")
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
600 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
601 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
602 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
603 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
604 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
605 print(p)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
606 dev.off()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
607 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
608 if(sum(all) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
609 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
610 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
611 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
612 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
613 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
614 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
615 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
616 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
617 colnames(patientResult)[6] = oneSample
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
618 colnames(patientResult)[7] = twoSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
619 colnames(patientResult)[8] = threeSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
620 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
621 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
622 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
623
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
624 colnamesBak = colnames(patientResult)
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
625 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
626 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
627 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
628
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
629 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
630 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
631
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
632 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
633 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
634 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
635 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
636 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
637 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
638 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
639 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
640 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
641
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
642 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
643
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
644 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
645 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
646 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
647 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
648 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
649 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
650 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
651 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
652 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
653 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
654 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
655 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
656 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
657 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
658 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
659 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
660
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
661 if(nrow(triplets) != 0){
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
662 triplets$uniqueID = "ID"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
663
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
664 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
665 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
666 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
667
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
668 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
669 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
670 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
671
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
672 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
673
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
674 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
675 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
676 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
677
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
678 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
679 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
680
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
681 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
682
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
683 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
684
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
685 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
686
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
687 triplets = triplets[triplets$normalized_read_count >= min_cells,]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
688
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
689 column_drops = c("locus_V", "locus_J", "min_cell_count", "min_cell_paste")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
690
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
691 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
692
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
693 #remove duplicate V+J+CDR3, add together numerical values
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
694 triplets = data.frame(data.table(triplets)[, list(Receptor=unique(.SD$Receptor),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
695 Cell_Count=unique(.SD$Cell_Count),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
696 Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
697 Total_Read_Count=sum(.SD$Total_Read_Count),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
698 dsPerM=ifelse("dsPerM" %in% names(dat), sum(.SD$dsPerM), 0),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
699 Related_to_leukemia_clone=all(.SD$Related_to_leukemia_clone),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
700 Frequency=sum(.SD$Frequency),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
701 normalized_read_count=sum(.SD$normalized_read_count),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
702 Log10_Frequency=sum(.SD$Log10_Frequency),
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
703 Clone_Sequence=.SD$Clone_Sequence[1]), by=c("Patient", "Sample", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "CDR3_Sense_Sequence")])
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
704
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
705
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
706 interval = intervalReads
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
707 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
708 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
709
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
710 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
711 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
712 three = triplets[triplets$Sample == "24062_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
713 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="normalized_read_count", T)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
714
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
715 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
716 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
717 three = triplets[triplets$Sample == "26759_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
718 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="normalized_read_count", T)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
719
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
720 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
721 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
722 three = triplets[triplets$Sample == "26759_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
723 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="normalized_read_count", T)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
724
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
725
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
726 interval = intervalFreq
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
727 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
728 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
729
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
730 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
731 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
732 three = triplets[triplets$Sample == "24062_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
733 tripletAnalysis(one, "14696_1", two, "14696_2", three, "14696_3", product=product, interval=interval, on="Frequency", F)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
734
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
735 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
736 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
737 three = triplets[triplets$Sample == "26759_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
738 tripletAnalysis(one, "16278_Left", two, "26402_Left", three, "26759_Left", product=product, interval=interval, on="Frequency", F)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
739
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
740 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
741 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
742 three = triplets[triplets$Sample == "26759_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
743 tripletAnalysis(one, "16278_Right", two, "26402_Right", three, "26759_Right", product=product, interval=interval, on="Frequency", F)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
744 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
745 cat("", file="triplets.txt")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
746 }