Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 51:17e677c72e49 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 09 Oct 2015 06:58:17 -0400 |
parents | 7dd7cefcf72d |
children | c5c2a790d476 |
comparison
equal
deleted
inserted
replaced
50:7dd7cefcf72d | 51:17e677c72e49 |
---|---|
63 Titles = factor(Titles, levels=Titles) | 63 Titles = factor(Titles, levels=Titles) |
64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) | 64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) |
65 | 65 |
66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0)) | 66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0)) |
67 | 67 |
68 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... | |
69 patient.merge.list.second = list() | |
70 | |
68 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ | 71 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ |
69 if (!is.data.frame(x) & is.list(x)){ | 72 if (!is.data.frame(x) & is.list(x)){ |
70 x = x[[1]] | 73 x = x[[1]] |
71 } | 74 } |
72 #x$Sample = factor(x$Sample, levels=unique(x$Sample)) | 75 #x$Sample = factor(x$Sample, levels=unique(x$Sample)) |
108 switched = T | 111 switched = T |
109 } | 112 } |
110 if(appendtxt){ | 113 if(appendtxt){ |
111 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) | 114 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) |
112 } | 115 } |
113 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T) | 116 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T) |
114 | 117 |
115 if(mergeOn == "Clone_Sequence"){ | 118 if(mergeOn == "Clone_Sequence"){ |
116 patient1$merge = paste(patient1$Clone_Sequence) | 119 patient1$merge = paste(patient1$Clone_Sequence) |
117 patient2$merge = paste(patient2$Clone_Sequence) | 120 patient2$merge = paste(patient2$Clone_Sequence) |
118 } else { | 121 } else { |
129 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy' | 132 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy' |
130 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh | 133 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh |
131 | 134 |
132 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence | 135 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence |
133 | 136 |
134 | 137 start.time = proc.time() |
135 #fuzzy matching here... | 138 merge.list = c() |
136 if(mergeOn == "Clone_Sequence"){ | 139 |
140 if(patient %in% names(patient.merge.list)){ | |
141 patientMerge = patient.merge.list[[patient]] | |
142 merge.list[["second"]] = patient.merge.list.second[[patient]] | |
143 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) | |
144 | |
145 print(names(patient.merge.list)) | |
146 } else { | |
147 #fuzzy matching here... | |
137 #merge.list = patientMerge$merge | 148 #merge.list = patientMerge$merge |
138 | 149 |
139 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),] | 150 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),] |
140 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] | 151 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] |
141 | 152 |
142 patient1.fuzzy = patient1 | 153 patient1.fuzzy = patient1 |
143 patient2.fuzzy = patient2 | 154 patient2.fuzzy = patient2 |
144 | 155 |
145 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) | 156 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) |
146 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) | 157 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) |
147 | 158 |
148 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) | 159 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) |
149 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) | 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) |
150 | 161 |
151 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) | 162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) |
152 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) | 163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) |
153 | 164 |
154 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove? | 165 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove? |
155 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] | 166 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] |
156 | 167 |
157 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 168 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
158 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 169 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
159 | 170 |
160 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) | 171 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) |
161 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] | 172 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] |
162 | 173 |
163 merge.list = list() | 174 merge.list = list() |
164 | 175 |
168 while(nrow(patient.fuzzy) > 1){ | 179 while(nrow(patient.fuzzy) > 1){ |
169 first.merge = patient.fuzzy[1,"merge"] | 180 first.merge = patient.fuzzy[1,"merge"] |
170 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] | 181 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] |
171 first.sample = patient.fuzzy[1,"Sample"] | 182 first.sample = patient.fuzzy[1,"Sample"] |
172 merge.filter = first.merge == patient.fuzzy$merge | 183 merge.filter = first.merge == patient.fuzzy$merge |
173 | 184 |
174 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 | 185 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 |
175 | 186 |
176 first.sample.filter = first.sample == patient.fuzzy$Sample | 187 first.sample.filter = first.sample == patient.fuzzy$Sample |
177 second.sample.filter = first.sample != patient.fuzzy$Sample | 188 second.sample.filter = first.sample != patient.fuzzy$Sample |
178 | 189 |
179 #first match same sample, sum to a single row, same for other sample | 190 #first match same sample, sum to a single row, same for other sample |
180 #then merge rows like 'normal' | 191 #then merge rows like 'normal' |
181 | 192 |
182 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) | 193 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) |
183 | 194 |
184 | 195 |
185 | 196 |
186 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter | 197 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter |
264 | 275 |
265 } else { | 276 } else { |
266 patient.fuzzy = patient.fuzzy[-1,] | 277 patient.fuzzy = patient.fuzzy[-1,] |
267 } | 278 } |
268 } | 279 } |
269 | 280 patient.merge.list[[patient]] <<- patientMerge |
270 } | 281 patient.merge.list.second[[patient]] <<- merge.list[["second"]] |
271 | 282 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) |
283 } | |
284 | |
285 print(names(patient.merge.list)) | |
286 | |
272 | 287 |
273 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | 288 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) |
274 res1 = vector() | 289 res1 = vector() |
275 res2 = vector() | 290 res2 = vector() |
276 resBoth = vector() | 291 resBoth = vector() |