annotate RScript.r @ 68:ef13f0a3f4d6 draft

Uploaded
author davidvanzessen
date Tue, 05 Jan 2016 10:49:40 -0500
parents 40c72b9ffc79
children c532b3f8dc97
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
2 options(scipen=999)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
3
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
4 inFile = args[1]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
5 outDir = args[2]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
6 logfile = args[3]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
7 min_freq = as.numeric(args[4])
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
8 min_cells = as.numeric(args[5])
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
9 mergeOn = args[6]
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
10
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
11 cat("<html><table><tr><td>Starting analysis</td></tr>", file=logfile, append=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
12
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
13 library(ggplot2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
14 library(reshape2)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
15 library(data.table)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
16 library(grid)
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
17 library(parallel)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
18 #require(xtable)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
19 cat("<tr><td>Reading input</td></tr>", file=logfile, append=T)
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
20 dat = read.table(inFile, header=T, sep="\t", dec=".", fill=T, stringsAsFactors=F)
66
ef3ac4f431bb Uploaded
davidvanzessen
parents: 65
diff changeset
21 dat = dat[,c("Patient", "Receptor", "Sample", "Cell_Count", "Clone_Molecule_Count_From_Spikes", "Log10_Frequency", "Total_Read_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence", "Clone_Sequence")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
22 dat$dsPerM = 0
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
23 dat = dat[!is.na(dat$Patient),]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
24 dat$Related_to_leukemia_clone = F
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
25
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
26 setwd(outDir)
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
27 cat("<tr><td>Selecting first V/J Genes</td></tr>", file=logfile, append=T)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
28 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
29 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
30
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
31 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
12
eb5b569b44dd Uploaded
davidvanzessen
parents: 11
diff changeset
32
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
33 dat$Frequency = ((10^dat$Log10_Frequency)*100)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
34
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
35 dat = dat[dat$Frequency >= min_freq,]
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
36
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
37 triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
38
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
39 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
40
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
41 dat$locus_V = substring(dat$V_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
42 dat$locus_J = substring(dat$J_Segment_Major_Gene, 0, 4)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
43 min_cell_count = data.frame(data.table(dat)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("Patient", "locus_V", "locus_J")])
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
44
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
45 dat$min_cell_paste = paste(dat$Patient, dat$locus_V, dat$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
46 min_cell_count$min_cell_paste = paste(min_cell_count$Patient, min_cell_count$locus_V, min_cell_count$locus_J)
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
47
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
48 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
49 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
50 dat = merge(dat, min_cell_count, by="min_cell_paste")
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
51 print(paste("rows:", nrow(dat)))
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
52 dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * dat$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
53
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
54 dat = dat[dat$normalized_read_count >= min_cells,]
13
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
55
576de7c96c4f Uploaded
davidvanzessen
parents: 12
diff changeset
56 dat$paste = paste(dat$Sample, dat$Clone_Sequence)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
57
22
b662fdc7eff4 Uploaded
davidvanzessen
parents: 20
diff changeset
58 patients = split(dat, dat$Patient, drop=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
59 intervalReads = rev(c(0,10,25,50,100,250,500,750,1000,10000))
6
8313c6cc65c5 Uploaded
davidvanzessen
parents: 5
diff changeset
60 intervalFreq = rev(c(0,0.01,0.05,0.1,0.5,1,5))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
61 V_Segments = c(".*", "IGHV", "IGHD", "IGKV", "IGKV", "IgKINTR", "TRGV", "TRDV", "TRDD" , "TRBV")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
62 J_Segments = c(".*", ".*", ".*", "IGKJ", "KDE", ".*", ".*", ".*", ".*", ".*")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
63 Titles = c("Total", "IGH-Vh-Jh", "IGH-Dh-Jh", "Vk-Jk", "Vk-Kde" , "Intron-Kde", "TCRG", "TCRD-Vd-Dd", "TCRD-Dd-Dd", "TCRB-Vb-Jb")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
64 Titles = factor(Titles, levels=Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
65 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
66
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
67 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
68
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
69 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
70 patient.merge.list.second = list()
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
71 scatter_locus_data_list = list()
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
72 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="multiple_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
73 cat(paste("<table border='0' style='font-family:courier;'>", sep=""), file="single_matches.html", append=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
74 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
75 if (!is.data.frame(x) & is.list(x)){
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
76 x = x[[1]]
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
77 }
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
78 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
79 x = data.frame(x,stringsAsFactors=F)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
80 onShort = "reads"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
81 if(on == "Frequency"){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
82 onShort = "freq"
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
83 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
84 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
85 ony = paste(on, ".y", sep="")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
86 splt = split(x, x$Sample, drop=T)
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
87 type="pair"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
88 if(length(splt) == 1){
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
89 print(paste(paste(x[1,which(colnames(x) == "Patient")]), "has one sample"))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
90 splt[[2]] = data.frame("Patient" = character(0), "Receptor" = character(0), "Sample" = character(0), "Cell_Count" = numeric(0), "Clone_Molecule_Count_From_Spikes" = numeric(0), "Log10_Frequency" = numeric(0), "Total_Read_Count" = numeric(0), "dsMol_per_1e6_cells" = numeric(0), "J_Segment_Major_Gene" = character(0), "V_Segment_Major_Gene" = character(0), "Clone_Sequence" = character(0), "CDR3_Sense_Sequence" = character(0), "Related_to_leukemia_clone" = logical(0), "Frequency"= numeric(0), "normalized_read_count" = numeric(0), "paste" = character(0))
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
91 type="single"
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
92 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
93 patient1 = splt[[1]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
94 patient2 = splt[[2]]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
95
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
96 threshholdIndex = which(colnames(product) == "interval")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
97 V_SegmentIndex = which(colnames(product) == "V_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
98 J_SegmentIndex = which(colnames(product) == "J_Segments")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
99 titleIndex = which(colnames(product) == "Titles")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
100 sampleIndex = which(colnames(x) == "Sample")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
101 patientIndex = which(colnames(x) == "Patient")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
102 oneSample = paste(patient1[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
103 twoSample = paste(patient2[1,sampleIndex], sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
104 patient = paste(x[1,patientIndex])
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
105
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
106 switched = F
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
107 if(length(grep(".*_Right$", twoSample)) == 1 || length(grep(".*_Dx_BM$", twoSample)) == 1 || length(grep(".*_Dx$", twoSample)) == 1 ){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
108 tmp = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
109 twoSample = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
110 oneSample = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
111 tmp = patient1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
112 patient1 = patient2
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
113 patient2 = tmp
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
114 switched = T
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
115 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
116 if(appendtxt){
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
117 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
118 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
119 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
120
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
121 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
122 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
123 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
124 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
125 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
126 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
127 }
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
128
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
129 scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
130 #scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
131 scatterplot_data = patient1[NULL,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
132 #scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
133 #scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
134 scatterplot.data.type.factor = c(oneSample, twoSample, paste(c(oneSample, twoSample), "In Both"))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
135 #scatterplot_data$type = factor(x=NULL, levels=scatterplot.data.type.factor)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
136 scatterplot_data$type = character(0)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
137 scatterplot_data$link = numeric(0)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
138 scatterplot_data$on = character(0)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
139
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
140 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
141 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
142
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
143 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
144
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
145 start.time = proc.time()
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
146 merge.list = c()
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
147
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
148 if(patient %in% names(patient.merge.list)){
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
149 patientMerge = patient.merge.list[[patient]]
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
150 merge.list[["second"]] = patient.merge.list.second[[patient]]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
151 scatterplot_data = scatter_locus_data_list[[patient]]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
152 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
153
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
154 print(names(patient.merge.list))
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
155 } else {
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
156 #fuzzy matching here...
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
157 #merge.list = patientMerge$merge
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
158
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
159 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
160 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
161
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
162 patient1.fuzzy = patient1
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
163 patient2.fuzzy = patient2
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
164
36
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
165 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
d592dab2fca1 Uploaded
davidvanzessen
parents: 35
diff changeset
166 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
167
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
168 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
169 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
170
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
171 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
172 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
173
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
174 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
175 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
176
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
177 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
178 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
179
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
180 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
181 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
182
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
183 merge.list = list()
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
184
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
185 merge.list[["second"]] = vector()
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
186
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
187 link.count = 1
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
188
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
189 while(nrow(patient.fuzzy) > 1){
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
190 first.merge = patient.fuzzy[1,"merge"]
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
191 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
192 first.sample = patient.fuzzy[1,"Sample"]
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
193 merge.filter = first.merge == patient.fuzzy$merge
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
194
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
195 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
196
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
197 first.sample.filter = first.sample == patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
198 second.sample.filter = first.sample != patient.fuzzy$Sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
199
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
200 #first match same sample, sum to a single row, same for other sample
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
201 #then merge rows like 'normal'
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
202
48
1b5b862b055b Uploaded
davidvanzessen
parents: 47
diff changeset
203 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
204
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
205
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
206
44
07278582b735 Uploaded
davidvanzessen
parents: 43
diff changeset
207 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
208 first.match.filter = merge.filter & sequence.filter & first.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
209 second.match.filter = merge.filter & sequence.filter & second.sample.filter
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
210
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
211 first.rows = patient.fuzzy[first.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
212 second.rows = patient.fuzzy[second.match.filter,]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
213
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
214 first.rows.v = table(first.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
215 first.rows.v = names(first.rows.v[which.max(first.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
216 first.rows.j = table(first.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
217 first.rows.j = names(first.rows.j[which.max(first.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
218
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
219 first.sum = data.frame(merge = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
220 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
221 Receptor = first.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
222 Sample = first.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
223 Cell_Count = first.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
224 Clone_Molecule_Count_From_Spikes = sum(first.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
225 Log10_Frequency = log10(sum(first.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
226 Total_Read_Count = sum(first.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
227 dsPerM = sum(first.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
228 J_Segment_Major_Gene = first.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
229 V_Segment_Major_Gene = first.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
230 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
231 CDR3_Sense_Sequence = first.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
232 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
233 Frequency = sum(first.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
234 locus_V = first.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
235 locus_J = first.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
236 min_cell_count = first.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
237 normalized_read_count = sum(first.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
238 paste = first.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
239 min_cell_paste = first.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
240
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
241 if(nrow(second.rows) > 0){
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
242 second.rows.v = table(second.rows$V_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
243 second.rows.v = names(second.rows.v[which.max(second.rows.v)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
244 second.rows.j = table(second.rows$J_Segment_Major_Gene)
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
245 second.rows.j = names(second.rows.j[which.max(second.rows.j)])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
246
55
43ce3ebfc9b5 Uploaded
davidvanzessen
parents: 54
diff changeset
247 second.sum = data.frame(merge = first.clone.sequence,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
248 Patient = patient,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
249 Receptor = second.rows[1,"Receptor"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
250 Sample = second.rows[1,"Sample"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
251 Cell_Count = second.rows[1,"Cell_Count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
252 Clone_Molecule_Count_From_Spikes = sum(second.rows$Clone_Molecule_Count_From_Spikes),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
253 Log10_Frequency = log10(sum(second.rows$Frequency)),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
254 Total_Read_Count = sum(second.rows$Total_Read_Count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
255 dsPerM = sum(second.rows$dsPerM),
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
256 J_Segment_Major_Gene = second.rows.j,
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
257 V_Segment_Major_Gene = second.rows.v,
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
258 Clone_Sequence = first.clone.sequence,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
259 CDR3_Sense_Sequence = second.rows[1,"CDR3_Sense_Sequence"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
260 Related_to_leukemia_clone = F,
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
261 Frequency = sum(second.rows$Frequency),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
262 locus_V = second.rows[1,"locus_V"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
263 locus_J = second.rows[1,"locus_J"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
264 min_cell_count = second.rows[1,"min_cell_count"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
265 normalized_read_count = sum(second.rows$normalized_read_count),
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
266 paste = second.rows[1,"paste"],
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
267 min_cell_paste = second.rows[1,"min_cell_paste"])
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
268
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
269 patientMerge = rbind(patientMerge, merge(first.sum, second.sum, by="merge"))
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
270 patient.fuzzy = patient.fuzzy[!(first.match.filter | second.match.filter),]
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
271
50
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
272 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"], second.rows[second.rows$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
7dd7cefcf72d Uploaded
davidvanzessen
parents: 49
diff changeset
273 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
274
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
275 tmp.rows = rbind(first.rows, second.rows)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
276 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
277
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
278
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
279 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
280 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
281 scatterplot.row$type = paste(first.sum[,"Sample"], "In Both")
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
282 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
283 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
284
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
285 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
286
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
287 scatterplot.row = second.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
288 scatterplot.row$type = paste(second.sum[,"Sample"], "In Both")
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
289 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
290 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
291
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
292 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
293
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
294 #write some information about the match to a log file
56
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
295 if (nrow(first.rows) > 1 | nrow(second.rows) > 1) {
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
296 cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
297 } else {
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
298 second.clone.sequence = second.rows[1,"Clone_Sequence"]
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
299 if(nchar(first.clone.sequence) != nchar(second.clone.sequence)){
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
300 cat(paste("<tr bgcolor='#DDD'><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
c89d9e89423b Uploaded
davidvanzessen
parents: 55
diff changeset
301 } else {
58
3ed7878f75c3 Uploaded
davidvanzessen
parents: 56
diff changeset
302 #cat(paste("<tr><td>", patient, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
303 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
304 }
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
305
59
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
306 } else if(nrow(first.rows) > 1) {
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
307 if(patient1[1,"Sample"] == first.sample){
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
308 patient1 = patient1[!(patient1$Clone_Sequence %in% first.rows$Clone_Sequence),]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
309 patient1 = rbind(patient1, first.sum)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
310 } else {
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
311 patient2 = patient2[!(patient2$Clone_Sequence %in% first.rows$Clone_Sequence),]
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
312 patient2 = rbind(patient2, first.sum)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
313 }
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
314
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
315 hidden.clone.sequences = c(first.rows[-1,"Clone_Sequence"])
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
316 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
317
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
318 patient.fuzzy = patient.fuzzy[-first.match.filter,]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
319
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
320 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
321 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
322 scatterplot.row$type = first.sum[,"Sample"]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
323 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
324 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
325
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
326 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
59
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
327
36e307208f1b Uploaded
davidvanzessen
parents: 58
diff changeset
328 cat(paste("<tr bgcolor='#DDF'><td>", patient, " row ", 1:nrow(first.rows), "</td><td>", first.rows$Sample, ":</td><td>", first.rows$Clone_Sequence, "</td><td>", first.rows$normalized_read_count, "</td></tr>", sep=""), file="single_matches.html", append=T)
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
329 } else {
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
330 patient.fuzzy = patient.fuzzy[-1,]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
331
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
332 #add to the scatterplot data
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
333 scatterplot.row = first.sum[,scatterplot_data_columns]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
334 scatterplot.row$type = first.sum[,"Sample"]
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
335 scatterplot.row$link = link.count
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
336 scatterplot.row$on = onShort
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
337
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
338 scatterplot_data = rbind(scatterplot_data, scatterplot.row)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
339 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
340 link.count = link.count + 1
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
341 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
342 patient.merge.list[[patient]] <<- patientMerge
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
343 patient.merge.list.second[[patient]] <<- merge.list[["second"]]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
344 scatter_locus_data_list[[patient]] <<- scatterplot_data
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
345 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
34
37d9074ef2c6 Uploaded
davidvanzessen
parents: 33
diff changeset
346 }
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
347
52
c5c2a790d476 Uploaded
davidvanzessen
parents: 51
diff changeset
348 patient1 = patient1[!(patient1$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
c5c2a790d476 Uploaded
davidvanzessen
parents: 51
diff changeset
349 patient2 = patient2[!(patient2$Clone_Sequence %in% patient.merge.list.second[[patient]]),]
51
17e677c72e49 Uploaded
davidvanzessen
parents: 50
diff changeset
350
37
623bbe972363 Uploaded
davidvanzessen
parents: 36
diff changeset
351
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
352 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
353 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony])
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
354 res1 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
355 res2 = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
356 resBoth = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
357 read1Count = vector()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
358 read2Count = vector()
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
359 locussum1 = vector()
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
360 locussum2 = vector()
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
361
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
362 #for(iter in 1){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
363 for(iter in 1:length(product[,1])){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
364 threshhold = product[iter,threshholdIndex]
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
365 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
366 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
367 #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
368 both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
369 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
370 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
14
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
371 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
1735e91a8f4b Uploaded
davidvanzessen
parents: 13
diff changeset
372 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
373 res1 = append(res1, sum(one))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
374 res2 = append(res2, sum(two))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
375 resBoth = append(resBoth, sum(both))
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
376 locussum1 = append(locussum1, sum(patient1[(grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene)),]$normalized_read_count))
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
377 locussum2 = append(locussum2, sum(patient2[(grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene)),]$normalized_read_count))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
378 #threshhold = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
379 if(threshhold != 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
380 if(sum(one) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
381 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
382 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
383 filenameOne = paste(oneSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
384 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
385 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
386 if(sum(two) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
387 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
388 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone Sequence", "Related_to_leukemia_clone")
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
389 filenameTwo = paste(twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
390 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
391 }
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
392 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
393 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
49
7658e9f3d416 Uploaded
davidvanzessen
parents: 48
diff changeset
394 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[[twoSample]]),]
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
395 #scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
396 if(nrow(scatterplot_locus_data) > 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
397 scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
398 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
399
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
400
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
401
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
402 #in_one = (scatterplot_locus_data$merge %in% patient1$merge)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
403 #in_two = (scatterplot_locus_data$merge %in% patient2$merge)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
404 #if(any(in_two)){
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
405 # scatterplot_locus_data[in_two,]$type = twoSample
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
406 #}
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
407 #in_both = (scatterplot_locus_data$merge %in% patientMerge$merge)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
408 ##merge.list.filter = (scatterplot_locus_data$merge %in% merge.list[[oneSample]])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
409 ##exact.matches.filter = (scatterplot_locus_data$merge %in% cs.exact.matches)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
410 #if(any(in_both)){
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
411 # scatterplot_locus_data[in_both,]$type = "In Both"
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
412 #}
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
413 #if(type == "single" & (nrow(scatterplot_locus_data) > 0 | !any(scatterplot_locus_data$Patient %in% single_patients$Patient))){
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
414 # single_patients <<- rbind(single_patients, scatterplot_locus_data)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
415 #}
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
416
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
417
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
418 p = NULL
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
419 print(paste("nrow scatterplot_locus_data", nrow(scatterplot_locus_data)))
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
420 if(nrow(scatterplot_locus_data) != 0){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
421 if(on == "normalized_read_count"){
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
422 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
423 #p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=10^6) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
424 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count, group=link)) + geom_line() + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,10^6)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
425 } else {
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
426 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
427 p = ggplot(scatterplot_locus_data, aes(type, Frequency, group=link)) + geom_line() + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + scale_x_discrete(breaks=levels(scatterplot_data$type), labels=levels(scatterplot_data$type), drop=FALSE)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
428 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
429 #p = p + geom_point(aes(colour=type), position="jitter")
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
430 p = p + geom_point(aes(colour=type), position="dodge")
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
431 p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
432 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
433 p = ggplot(NULL, aes(x=c("In one", "In Both"),y=0)) + geom_blank(NULL) + xlab("In one or both of the samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
434 }
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
435 png(paste(patient1[1,patientIndex], "_", patient1[1,sampleIndex], "_", patient2[1,sampleIndex], "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
436 print(p)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
437 dev.off()
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
438 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
439 if(sum(both) > 0){
15
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
440 dfBoth = patientMerge[both,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d137974763b3 Uploaded
davidvanzessen
parents: 14
diff changeset
441 colnames(dfBoth) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
442 filenameBoth = paste(oneSample, "_", twoSample, "_", product[iter, titleIndex], "_", threshhold, sep="")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
443 write.table(dfBoth, file=paste(filenameBoth, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
444 }
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
445 }
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
446 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "Both"=resBoth, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "Sum"=res1 + res2 + resBoth, "percentage" = round((resBoth/(res1 + res2 + resBoth)) * 100, digits=2), "Locus_sum1"=locussum1, "Locus_sum2"=locussum2)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
447 if(sum(is.na(patientResult$percentage)) > 0){
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
448 patientResult[is.na(patientResult$percentage),]$percentage = 0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
449 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
450 colnames(patientResult)[6] = oneSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
451 colnames(patientResult)[8] = twoSample
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
452 colnamesBak = colnames(patientResult)
2
8d562506f4f9 Uploaded
davidvanzessen
parents: 0
diff changeset
453 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", paste("Number of sequences ", patient, "_Both", sep=""), paste("Number of sequences", oneSample, sep=""), paste("Normalized Read Count", oneSample), paste("Number of sequences", twoSample, sep=""), paste("Normalized Read Count", twoSample), paste("Sum number of sequences", patient), paste("Percentage of sequences ", patient, "_Both", sep=""), paste("Locus Sum", oneSample), paste("Locus Sum", twoSample))
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
454 write.table(patientResult, file=paste(patient, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
455 colnames(patientResult) = colnamesBak
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
456
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
457 patientResult$Locus = factor(patientResult$Locus, Titles)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
458 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
459
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
460 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "Both")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
461 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=Both), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
462 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
463 plt = plt + geom_text(aes(ymax=max(Both), x=cut_off_value,y=Both,label=Both), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
464 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in both")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
465 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
466 png(paste(patient, "_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
467 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
468 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
469 #(t,r,b,l)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
470 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "percentage")])
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
471 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=percentage), stat='identity', position="dodge", fill="#79c36a")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
472 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
473 plt = plt + geom_text(aes(ymax=max(percentage), x=cut_off_value,y=percentage,label=percentage), angle=90, hjust=0)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
474 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("% clones in both left and right")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
475 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
476 png(paste(patient, "_percent_", onShort, ".png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
477 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
478 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
479
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
480 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample)] ,id.vars=1:2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
481 patientResult$relativeValue = patientResult$value * 10
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
482 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
483 plt = ggplot(patientResult)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
484 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
485 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
486 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
487 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.2)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
488 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.8)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
489 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle(paste("Number of clones in only ", oneSample, " and only ", twoSample, sep=""))
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
490 png(paste(patient, "_", onShort, "_both.png", sep=""), width=1920, height=1080)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
491 print(plt)
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
492 dev.off()
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
493 }
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
494
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
495 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
496
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
497 interval = intervalFreq
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
498 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
499 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
500 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
501
3
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
502 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
f9316f7676cc Uploaded
davidvanzessen
parents: 2
diff changeset
503
0
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
504 interval = intervalReads
c5ac9a871b26 Uploaded
davidvanzessen
parents:
diff changeset
505 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
4
f11df36f43bb Uploaded
davidvanzessen
parents: 3
diff changeset
506 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
507 lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
508
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
509 if(nrow(single_patients) > 0){
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
510 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
511 p = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
512 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
513 p = p + xlab("In one or both samples") + ylab("Reads")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
514 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the reads of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
515 png("singles_reads_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
516 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
517 dev.off()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
518
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
519 #p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
520 p = ggplot(single_patients, aes(Rearrangement, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
521 p = p + geom_point(aes(colour=type), position="jitter")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
522 p = p + xlab("In one or both samples") + ylab("Frequency")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
523 p = p + facet_grid(.~Patient) + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
524 png("singles_freq_scatterplot.png", width=640 * length(unique(single_patients$Patient)) + 100, height=1080)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
525 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
526 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
527 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
528 empty <- data.frame()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
529 p = ggplot(empty) + geom_point() + xlim(0, 10) + ylim(0, 100) + xlab("In one or both samples") + ylab("Frequency") + ggtitle("Scatterplot of the frequency of the patients with a single sample")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
530
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
531 png("singles_reads_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
532 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
533 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
534
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
535 png("singles_freq_scatterplot.png", width=400, height=300)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
536 print(p)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
537 dev.off()
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
538 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
539
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
540 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
541 patient.merge.list.second = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
542
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
543 tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
544 onShort = "reads"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
545 if(on == "Frequency"){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
546 onShort = "freq"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
547 }
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
548 onx = paste(on, ".x", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
549 ony = paste(on, ".y", sep="")
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
550 onz = paste(on, ".z", sep="")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
551 type="triplet"
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
552
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
553 threshholdIndex = which(colnames(product) == "interval")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
554 V_SegmentIndex = which(colnames(product) == "V_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
555 J_SegmentIndex = which(colnames(product) == "J_Segments")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
556 titleIndex = which(colnames(product) == "Titles")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
557 sampleIndex = which(colnames(patient1) == "Sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
558 patientIndex = which(colnames(patient1) == "Patient")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
559 oneSample = paste(patient1[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
560 twoSample = paste(patient2[1,sampleIndex], sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
561 threeSample = paste(patient3[1,sampleIndex], sep="")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
562
29
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
563 if(mergeOn == "Clone_Sequence"){
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
564 patient1$merge = paste(patient1$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
565 patient2$merge = paste(patient2$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
566 patient3$merge = paste(patient3$Clone_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
567
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
568 } else {
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
569 patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
570 patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
571 patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
5ab17bdf2530 Uploaded
davidvanzessen
parents: 28
diff changeset
572 }
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
573
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
574 #patientMerge = merge(patient1, patient2, by="merge")[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
575 patient1.fuzzy = patient1
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
576 patient2.fuzzy = patient2
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
577 patient3.fuzzy = patient3
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
578
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
579 cat(paste("<tr><td>", label1, "</td>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
580
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
581 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
582 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
583 patient3.fuzzy$merge = paste(patient3.fuzzy$locus_V, patient3.fuzzy$locus_J)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
584
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
585 patient.fuzzy = rbind(patient1.fuzzy ,patient2.fuzzy, patient3.fuzzy)
65
7a63e90cfc3d Uploaded
davidvanzessen
parents: 64
diff changeset
586 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
587
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
588 other.sample.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
589 other.sample.list[[oneSample]] = c(twoSample, threeSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
590 other.sample.list[[twoSample]] = c(oneSample, threeSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
591 other.sample.list[[threeSample]] = c(oneSample, twoSample)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
592
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
593 patientMerge = merge(patient1, patient2, by="merge")
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
594 patientMerge = merge(patientMerge, patient3, by="merge")
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
595 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
596 #patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
597 patientMerge = patientMerge[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
598
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
599 duo.merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
600
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
601 patientMerge12 = merge(patient1, patient2, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
602 #patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
603 patientMerge12 = patientMerge12[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
604 duo.merge.list[[paste(oneSample, twoSample)]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
605 duo.merge.list[[paste(twoSample, oneSample)]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
606
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
607 patientMerge13 = merge(patient1, patient3, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
608 #patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
609 patientMerge13 = patientMerge13[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
610 duo.merge.list[[paste(oneSample, threeSample)]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
611 duo.merge.list[[paste(threeSample, oneSample)]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
612
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
613 patientMerge23 = merge(patient2, patient3, by="merge")
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
614 #patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
615 patientMerge23 = patientMerge23[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
616 duo.merge.list[[paste(twoSample, threeSample)]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
617 duo.merge.list[[paste(threeSample, twoSample)]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
618
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
619 merge.list = list()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
620 merge.list[["second"]] = vector()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
621
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
622 start.time = proc.time()
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
623 if(paste(label1, "123") %in% names(patient.merge.list)){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
624 patientMerge = patient.merge.list[[paste(label1, "123")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
625 patientMerge12 = patient.merge.list[[paste(label1, "12")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
626 patientMerge13 = patient.merge.list[[paste(label1, "13")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
627 patientMerge23 = patient.merge.list[[paste(label1, "23")]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
628
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
629 merge.list[["second"]] = patient.merge.list.second[[label1]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
630
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
631 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
632 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
633 while(nrow(patient.fuzzy) > 0){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
634 first.merge = patient.fuzzy[1,"merge"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
635 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
636 first.sample = patient.fuzzy[1,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
637
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
638 merge.filter = first.merge == patient.fuzzy$merge
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
639
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
640 second.sample = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
641 third.sample = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
642
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
643 sample.filter.1 = first.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
644 sample.filter.2 = second.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
645 sample.filter.3 = third.sample == patient.fuzzy$Sample
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
646
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
647 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
648
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
649 match.filter.1 = sample.filter.1 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
650 match.filter.2 = sample.filter.2 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
651 match.filter.3 = sample.filter.3 & sequence.filter & merge.filter
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
652
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
653 matches.in.1 = any(match.filter.1)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
654 matches.in.2 = any(match.filter.2)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
655 matches.in.3 = any(match.filter.3)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
656
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
657
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
658
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
659 rows.1 = patient.fuzzy[match.filter.1,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
660
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
661 sum.1 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
662 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
663 Receptor = rows.1[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
664 Sample = rows.1[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
665 Cell_Count = rows.1[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
666 Clone_Molecule_Count_From_Spikes = sum(rows.1$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
667 Log10_Frequency = log10(sum(rows.1$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
668 Total_Read_Count = sum(rows.1$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
669 dsPerM = sum(rows.1$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
670 J_Segment_Major_Gene = rows.1[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
671 V_Segment_Major_Gene = rows.1[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
672 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
673 CDR3_Sense_Sequence = rows.1[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
674 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
675 Frequency = sum(rows.1$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
676 locus_V = rows.1[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
677 locus_J = rows.1[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
678 uniqueID = rows.1[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
679 normalized_read_count = sum(rows.1$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
680 sum.2 = sum.1[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
681 rows.2 = patient.fuzzy[match.filter.2,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
682 if(matches.in.2){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
683 sum.2 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
684 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
685 Receptor = rows.2[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
686 Sample = rows.2[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
687 Cell_Count = rows.2[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
688 Clone_Molecule_Count_From_Spikes = sum(rows.2$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
689 Log10_Frequency = log10(sum(rows.2$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
690 Total_Read_Count = sum(rows.2$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
691 dsPerM = sum(rows.2$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
692 J_Segment_Major_Gene = rows.2[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
693 V_Segment_Major_Gene = rows.2[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
694 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
695 CDR3_Sense_Sequence = rows.2[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
696 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
697 Frequency = sum(rows.2$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
698 locus_V = rows.2[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
699 locus_J = rows.2[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
700 uniqueID = rows.2[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
701 normalized_read_count = sum(rows.2$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
702 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
703
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
704 sum.3 = sum.1[NULL,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
705 rows.3 = patient.fuzzy[match.filter.3,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
706 if(matches.in.3){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
707 sum.3 = data.frame(merge = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
708 Patient = label1,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
709 Receptor = rows.3[1,"Receptor"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
710 Sample = rows.3[1,"Sample"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
711 Cell_Count = rows.3[1,"Cell_Count"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
712 Clone_Molecule_Count_From_Spikes = sum(rows.3$Clone_Molecule_Count_From_Spikes),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
713 Log10_Frequency = log10(sum(rows.3$Frequency)),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
714 Total_Read_Count = sum(rows.3$Total_Read_Count),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
715 dsPerM = sum(rows.3$dsPerM),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
716 J_Segment_Major_Gene = rows.3[1,"J_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
717 V_Segment_Major_Gene = rows.3[1,"V_Segment_Major_Gene"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
718 Clone_Sequence = first.clone.sequence,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
719 CDR3_Sense_Sequence = rows.3[1,"CDR3_Sense_Sequence"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
720 Related_to_leukemia_clone = F,
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
721 Frequency = sum(rows.3$Frequency),
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
722 locus_V = rows.3[1,"locus_V"],
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
723 locus_J = rows.3[1,"locus_J"],
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
724 uniqueID = rows.3[1,"uniqueID"],
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
725 normalized_read_count = sum(rows.3$normalized_read_count))
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
726 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
727
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
728 if(matches.in.2 & matches.in.3){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
729 merge.123 = merge(sum.1, sum.2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
730 merge.123 = merge(merge.123, sum.3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
731 colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123)))] = paste(colnames(merge.123)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(merge.123), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
732 #merge.123$thresholdValue = pmax(merge.123[,onx], merge.123[,ony], merge.123[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
733
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
734 patientMerge = rbind(patientMerge, merge.123)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
735 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2 | match.filter.3),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
736
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
737 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
738 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
739
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
740 } else if (matches.in.2) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
741 #other.sample1 = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
742 #other.sample2 = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
743
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
744 second.sample = sum.2[,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
745
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
746 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
747
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
748 merge.12 = merge(sum.1, sum.2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
749
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
750 current.merge.list = rbind(current.merge.list, merge.12)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
751 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
752
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
753 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.2),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
754
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
755 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.2[rows.2$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
756 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
757
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
758 } else if (matches.in.3) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
759
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
760 #other.sample1 = other.sample.list[[first.sample]][1]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
761 #other.sample2 = other.sample.list[[first.sample]][2]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
762
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
763 second.sample = sum.3[,"Sample"]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
764
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
765 current.merge.list = duo.merge.list[[paste(first.sample, second.sample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
766
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
767 merge.13 = merge(sum.1, sum.3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
768
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
769 current.merge.list = rbind(current.merge.list, merge.13)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
770 duo.merge.list[[paste(first.sample, second.sample)]] = current.merge.list
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
771
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
772 patient.fuzzy = patient.fuzzy[!(match.filter.1 | match.filter.3),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
773
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
774 hidden.clone.sequences = c(rows.1[-1,"Clone_Sequence"], rows.3[rows.3$Clone_Sequence != first.clone.sequence,"Clone_Sequence"])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
775 merge.list[["second"]] = append(merge.list[["second"]], hidden.clone.sequences)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
776
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
777 } else if(nrow(rows.1) > 1){
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
778 patient1 = patient1[!(patient1$Clone_Sequence %in% rows.1$Clone_Sequence),]
64
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
779 print(names(patient1)[names(patient1) %in% sum.1])
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
780 print(names(patient1)[!(names(patient1) %in% sum.1)])
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
781 print(names(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
782 print(names(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
783 print(summary(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
784 print(summary(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
785 print(dim(sum.1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
786 print(dim(patient1))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
787 print(head(sum.1[,names(patient1)]))
39fff0180d41 Uploaded
davidvanzessen
parents: 63
diff changeset
788 patient1 = rbind(patient1, sum.1[,names(patient1)])
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
789 patient.fuzzy = patient.fuzzy[-match.filter.1,]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
790 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
791 patient.fuzzy = patient.fuzzy[-1,]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
792 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
793
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
794 tmp.rows = rbind(rows.1, rows.2, rows.3)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
795 tmp.rows = tmp.rows[order(nchar(tmp.rows$Clone_Sequence)),]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
796
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
797 if (sum(match.filter.1) > 1 | sum(match.filter.2) > 1 | sum(match.filter.1) > 1) {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
798 cat(paste("<tr><td>", label1, " row ", 1:nrow(tmp.rows), "</td><td>", tmp.rows$Sample, ":</td><td>", tmp.rows$Clone_Sequence, "</td><td>", tmp.rows$normalized_read_count, "</td></tr>", sep=""), file="multiple_matches.html", append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
799 } else {
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
800 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
801
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
802 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
803 patient.merge.list[[paste(label1, "123")]] = patientMerge
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
804
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
805 patientMerge12 = duo.merge.list[[paste(oneSample, twoSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
806 patientMerge13 = duo.merge.list[[paste(oneSample, threeSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
807 patientMerge23 = duo.merge.list[[paste(twoSample, threeSample)]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
808
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
809 patient.merge.list[[paste(label1, "12")]] = patientMerge12
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
810 patient.merge.list[[paste(label1, "13")]] = patientMerge13
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
811 patient.merge.list[[paste(label1, "23")]] = patientMerge23
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
812
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
813 patient.merge.list.second[[label1]] = merge.list[["second"]]
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
814 }
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
815 cat(paste("<td>", nrow(patient1), " in ", label1, " and ", nrow(patient2), " in ", label2, nrow(patient3), " in ", label3, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
816 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
817 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
818 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
819 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
820
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
821 #patientMerge$thresholdValue = pmin(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
822 #patientMerge12$thresholdValue = pmin(patientMerge12[,onx], patientMerge12[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
823 #patientMerge13$thresholdValue = pmin(patientMerge13[,onx], patientMerge13[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
824 #patientMerge23$thresholdValue = pmin(patientMerge23[,onx], patientMerge23[,ony])
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
825
63
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
826 patient1 = patient1[!(patient1$Clone_Sequence %in% merge.list[["second"]]),]
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
827 patient2 = patient2[!(patient2$Clone_Sequence %in% merge.list[["second"]]),]
39ea37070e90 Uploaded
davidvanzessen
parents: 62
diff changeset
828 patient3 = patient3[!(patient3$Clone_Sequence %in% merge.list[["second"]]),]
62
b1ad6f515338 Uploaded
davidvanzessen
parents: 61
diff changeset
829
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
830 if(F){
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
831 patientMerge = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
832 patientMerge = merge(patientMerge, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
833 colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge)))] = paste(colnames(patientMerge)[which(!grepl("(\\.x$)|(\\.y$)|(merge)", names(patientMerge), perl=T))], ".z", sep="")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
834 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony], patientMerge[,onz])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
835 patientMerge12 = merge(patient1, patient2, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
836 patientMerge12$thresholdValue = pmax(patientMerge12[,onx], patientMerge12[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
837 patientMerge13 = merge(patient1, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
838 patientMerge13$thresholdValue = pmax(patientMerge13[,onx], patientMerge13[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
839 patientMerge23 = merge(patient2, patient3, by="merge")
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
840 patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
841 }
28
a63ccc36f5a4 Uploaded
davidvanzessen
parents: 27
diff changeset
842
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
843 scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
844 scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
845 scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
846 scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
847
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
848 res1 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
849 res2 = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
850 res3 = vector()
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
851 res12 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
852 res13 = vector()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
853 res23 = vector()
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
854 resAll = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
855 read1Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
856 read2Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
857 read3Count = vector()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
858
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
859 if(appendTriplets){
9
58a28427930e Uploaded
davidvanzessen
parents: 8
diff changeset
860 cat(paste(label1, label2, label3, sep="\t"), file="triplets.txt", append=T, sep="", fill=3)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
861 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
862 for(iter in 1:length(product[,1])){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
863 threshhold = product[iter,threshholdIndex]
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
864 V_Segment = paste(".*", as.character(product[iter,V_SegmentIndex]), ".*", sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
865 J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
866 #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
867 all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
868
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
869 one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
870 one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
871 two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
872
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
873 one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
874 two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
875 three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
876
18
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
877 read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
878 read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
f23d3be6fbc8 Uploaded
davidvanzessen
parents: 17
diff changeset
879 read3Count = append(read3Count, sum(patient3[three,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.z))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
880 res1 = append(res1, sum(one))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
881 res2 = append(res2, sum(two))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
882 res3 = append(res3, sum(three))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
883 resAll = append(resAll, sum(all))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
884 res12 = append(res12, sum(one_two))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
885 res13 = append(res13, sum(one_three))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
886 res23 = append(res23, sum(two_three))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
887 #threshhold = 0
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
888 if(threshhold != 0){
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
889 if(sum(one) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
890 dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
891 colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
892 filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
893 write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
894 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
895 if(sum(two) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
896 dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
897 colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
898 filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
899 write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
900 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
901 if(sum(three) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
902 dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
903 colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Clone_Sequence", "Related_to_leukemia_clone")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
904 filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
905 write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
906 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
907 if(sum(one_two) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
908 dfOne_two = patientMerge12[one_two,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
909 colnames(dfOne_two) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
910 filenameOne_two = paste(label1, "_", label2, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
911 write.table(dfOne_two, file=paste(filenameOne_two, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
912 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
913 if(sum(one_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
914 dfOne_three = patientMerge13[one_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
915 colnames(dfOne_three) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
916 filenameOne_three = paste(label1, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
917 write.table(dfOne_three, file=paste(filenameOne_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
918 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
919 if(sum(two_three) > 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
920 dfTwo_three = patientMerge23[two_three,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
921 colnames(dfTwo_three) = c(paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample),"Clone_Sequence", paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
922 filenameTwo_three = paste(label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, onShort, sep="")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
923 write.table(dfTwo_three, file=paste(filenameTwo_three, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
924 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
925 } else { #scatterplot data
24
6904186d13b9 Uploaded
davidvanzessen
parents: 22
diff changeset
926 scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
927 scatterplot_locus_data = scatterplot_locus_data[!(scatterplot_locus_data$merge %in% merge.list[["second"]]),]
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
928 in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
929 if(sum(in_two) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
930 scatterplot_locus_data[in_two,]$type = "In two"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
931 }
30
45554fd15511 Uploaded
davidvanzessen
parents: 29
diff changeset
932 in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
27
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
933 if(sum(in_three)> 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
934 scatterplot_locus_data[in_three,]$type = "In three"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
935 }
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
936 not_in_one = scatterplot_locus_data$type != "In one"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
937 if(sum(not_in_one) > 0){
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
938 scatterplot_locus_data[not_in_one,]$type = "In multiple"
dd8518ea23dd Uploaded
davidvanzessen
parents: 26
diff changeset
939 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
940 p = NULL
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
941 if(nrow(scatterplot_locus_data) != 0){
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
942 if(on == "normalized_read_count"){
31
ce8bd23d0335 Uploaded
davidvanzessen
parents: 30
diff changeset
943 scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
32
dde5ec847549 Uploaded
davidvanzessen
parents: 31
diff changeset
944 p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
945 } else {
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
946 p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_log10(limits=c(0.0001,100), breaks=c(0.0001, 0.001, 0.01, 0.1, 1, 10, 100), labels=c("0.0001", "0.001", "0.01", "0.1", "1", "10", "100")) + expand_limits(y=c(0,100))
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
947 #p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
948 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
949 p = p + geom_point(aes(colour=type), position="jitter")
25
99020e5ce46c Uploaded
davidvanzessen
parents: 24
diff changeset
950 p = p + xlab("In one or in multiple samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
951 } else {
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
952 p = ggplot(NULL, aes(x=c("In one", "In multiple"),y=0)) + geom_blank(NULL) + xlab("In two or in three of the samples") + ylab(onShort) + ggtitle(paste(label1, label2, label3, onShort, product[iter, titleIndex]))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
953 }
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
954 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_", product[iter, titleIndex],"_scatter.png", sep=""))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
955 print(p)
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
956 dev.off()
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
957 }
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
958 if(sum(all) > 0){
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
959 dfAll = patientMerge[all,c("V_Segment_Major_Gene.x", "J_Segment_Major_Gene.x", "normalized_read_count.x", "Frequency.x", "Related_to_leukemia_clone.x", "Clone_Sequence.x", "V_Segment_Major_Gene.y", "J_Segment_Major_Gene.y", "normalized_read_count.y", "Frequency.y", "Related_to_leukemia_clone.y", "V_Segment_Major_Gene.z", "J_Segment_Major_Gene.z", "normalized_read_count.z", "Frequency.z", "Related_to_leukemia_clone.z")]
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
960 colnames(dfAll) = c(paste("Proximal segment", oneSample), paste("Distal segment", oneSample), paste("Normalized_Read_Count", oneSample), paste("Frequency", oneSample), paste("Related_to_leukemia_clone", oneSample),"Clone_Sequence", paste("Proximal segment", twoSample), paste("Distal segment", twoSample), paste("Normalized_Read_Count", twoSample), paste("Frequency", twoSample), paste("Related_to_leukemia_clone", twoSample), paste("Proximal segment", threeSample), paste("Distal segment", threeSample), paste("Normalized_Read_Count", threeSample), paste("Frequency", threeSample), paste("Related_to_leukemia_clone", threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
961 filenameAll = paste(label1, "_", label2, "_", label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
962 write.table(dfAll, file=paste(filenameAll, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
963 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
964 }
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
965 #patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "read_count1" = round(read1Count), "tmp2"=res2, "read_count2"= round(read2Count), "tmp3"=res3, "read_count3"=round(read3Count))
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
966 patientResult = data.frame("Locus"=product$Titles, "J_Segment"=product$J_Segments, "V_Segment"=product$V_Segments, "cut_off_value"=paste(">", product$interval, sep=""), "All"=resAll, "tmp1"=res1, "tmp2"=res2, "tmp3"=res3, "tmp12"=res12, "tmp13"=res13, "tmp23"=res23)
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
967 colnames(patientResult)[6] = oneSample
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
968 colnames(patientResult)[7] = twoSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
969 colnames(patientResult)[8] = threeSample
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
970 colnames(patientResult)[9] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
971 colnames(patientResult)[10] = paste(oneSample, twoSample, sep="_")
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
972 colnames(patientResult)[11] = paste(oneSample, twoSample, sep="_")
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
973
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
974 colnamesBak = colnames(patientResult)
20
d938aef60589 Uploaded
davidvanzessen
parents: 19
diff changeset
975 colnames(patientResult) = c("Ig/TCR gene rearrangement type", "Distal Gene segment", "Proximal gene segment", "cut_off_value", "Number of sequences All", paste("Number of sequences", oneSample), paste("Number of sequences", twoSample), paste("Number of sequences", threeSample), paste("Number of sequences", oneSample, twoSample), paste("Number of sequences", oneSample, threeSample), paste("Number of sequences", twoSample, threeSample))
7
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
976 write.table(patientResult, file=paste(label1, "_", label2, "_", label3, "_", onShort, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
977 colnames(patientResult) = colnamesBak
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
978
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
979 patientResult$Locus = factor(patientResult$Locus, Titles)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
980 patientResult$cut_off_value = factor(patientResult$cut_off_value, paste(">", interval, sep=""))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
981
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
982 plt = ggplot(patientResult[,c("Locus", "cut_off_value", "All")])
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
983 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=All), stat='identity', position="dodge", fill="#79c36a")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
984 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
985 plt = plt + geom_text(aes(ymax=max(All), x=cut_off_value,y=All,label=All), angle=90, hjust=0)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
986 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in All")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
987 plt = plt + theme(plot.margin = unit(c(1,8.8,0.5,1.5), "lines"))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
988 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_total_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
989 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
990 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
991
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
992 fontSize = 4
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
993
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
994 bak = patientResult
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
995 patientResult = melt(patientResult[,c('Locus','cut_off_value', oneSample, twoSample, threeSample)] ,id.vars=1:2)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
996 patientResult$relativeValue = patientResult$value * 10
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
997 patientResult[patientResult$relativeValue == 0,]$relativeValue = 1
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
998 plt = ggplot(patientResult)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
999 plt = plt + geom_bar( aes( x=factor(cut_off_value), y=relativeValue, fill=variable), stat='identity', position="dodge")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1000 plt = plt + facet_grid(.~Locus) + theme(axis.text.x = element_text(angle = 45, hjust = 1))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1001 plt = plt + scale_y_continuous(trans="log", breaks=10^c(0:10), labels=c(0, 10^c(0:9)))
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1002 plt = plt + geom_text(data=patientResult[patientResult$variable == oneSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=-0.7, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1003 plt = plt + geom_text(data=patientResult[patientResult$variable == twoSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=0.4, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1004 plt = plt + geom_text(data=patientResult[patientResult$variable == threeSample,], aes(ymax=max(value), x=cut_off_value,y=relativeValue,label=value), angle=90, position=position_dodge(width=0.9), hjust=0, vjust=1.5, size=fontSize)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1005 plt = plt + xlab("Reads per locus") + ylab("Count") + ggtitle("Number of clones in only one sample")
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1006 png(paste(label1, "_", label2, "_", label3, "_", onShort, "_indiv_all.png", sep=""), width=1920, height=1080)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1007 print(plt)
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1008 dev.off()
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1009 }
68c6c7624ffc Uploaded
davidvanzessen
parents: 6
diff changeset
1010
67
40c72b9ffc79 Uploaded
davidvanzessen
parents: 66
diff changeset
1011 if(F & nrow(triplets) != 0){
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1012
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1013 cat("<tr><td>Starting triplet analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1014
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1015 triplets$uniqueID = "ID"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1016
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1017 triplets[grepl("16278_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1018 triplets[grepl("26402_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1019 triplets[grepl("26759_Left", triplets$Sample),]$uniqueID = "16278_26402_26759_Left"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1020
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1021 triplets[grepl("16278_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1022 triplets[grepl("26402_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1023 triplets[grepl("26759_Right", triplets$Sample),]$uniqueID = "16278_26402_26759_Right"
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1024
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1025 triplets[grepl("14696", triplets$Patient),]$uniqueID = "14696"
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1026
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1027 cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1028
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1029 triplets$locus_V = substring(triplets$V_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1030 triplets$locus_J = substring(triplets$J_Segment_Major_Gene, 0, 4)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1031 min_cell_count = data.frame(data.table(triplets)[, list(min_cell_count=min(.SD$Cell_Count)), by=c("uniqueID", "locus_V", "locus_J")])
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1032
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1033 triplets$min_cell_paste = paste(triplets$uniqueID, triplets$locus_V, triplets$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1034 min_cell_count$min_cell_paste = paste(min_cell_count$uniqueID, min_cell_count$locus_V, min_cell_count$locus_J)
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1035
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1036 min_cell_count = min_cell_count[,c("min_cell_paste", "min_cell_count")]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1037
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1038 triplets = merge(triplets, min_cell_count, by="min_cell_paste")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1039
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1040 triplets$normalized_read_count = round(triplets$Clone_Molecule_Count_From_Spikes / triplets$Cell_Count * triplets$min_cell_count / 2, digits=2) #??????????????????????????????????? wel of geen / 2
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1041
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1042 triplets = triplets[triplets$normalized_read_count >= min_cells,]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1043
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1044 column_drops = c("min_cell_count", "min_cell_paste")
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1045
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1046 triplets = triplets[,!(colnames(triplets) %in% column_drops)]
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1047
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1048 cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1049
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1050 interval = intervalReads
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1051 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1052 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1053
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1054 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1055 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1056 three = triplets[triplets$Sample == "24062_reg_BM",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1057 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1058
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1059 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1060 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1061 three = triplets[triplets$Sample == "26759_Left",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1062 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1063
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1064 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1065 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1066 three = triplets[triplets$Sample == "26759_Right",]
53
8ebe57feecd6 Uploaded
davidvanzessen
parents: 52
diff changeset
1067 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1068
60
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1069 cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)
28c3695259c1 Uploaded
davidvanzessen
parents: 59
diff changeset
1070
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1071 interval = intervalFreq
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1072 intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1073 product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1074
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1075 one = triplets[triplets$Sample == "14696_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1076 two = triplets[triplets$Sample == "24536_reg_BM",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1077 three = triplets[triplets$Sample == "24062_reg_BM",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1078 tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1079
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1080 one = triplets[triplets$Sample == "16278_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1081 two = triplets[triplets$Sample == "26402_Left",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1082 three = triplets[triplets$Sample == "26759_Left",]
61
77a090cd0e02 Uploaded
davidvanzessen
parents: 60
diff changeset
1083 tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1084
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1085 one = triplets[triplets$Sample == "16278_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1086 two = triplets[triplets$Sample == "26402_Right",]
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1087 three = triplets[triplets$Sample == "26759_Right",]
53
8ebe57feecd6 Uploaded
davidvanzessen
parents: 52
diff changeset
1088 tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)
33
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1089 } else {
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1090 cat("", file="triplets.txt")
642b4593f0a4 Uploaded
davidvanzessen
parents: 32
diff changeset
1091 }
68
ef13f0a3f4d6 Uploaded
davidvanzessen
parents: 67
diff changeset
1092 cat("</table></html>", file=logfile, append=T)