Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 51:17e677c72e49 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Fri, 09 Oct 2015 06:58:17 -0400 |
| parents | 7dd7cefcf72d |
| children | c5c2a790d476 |
comparison
equal
deleted
inserted
replaced
| 50:7dd7cefcf72d | 51:17e677c72e49 |
|---|---|
| 63 Titles = factor(Titles, levels=Titles) | 63 Titles = factor(Titles, levels=Titles) |
| 64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) | 64 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles)) |
| 65 | 65 |
| 66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0)) | 66 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0)) |
| 67 | 67 |
| 68 patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory... | |
| 69 patient.merge.list.second = list() | |
| 70 | |
| 68 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ | 71 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){ |
| 69 if (!is.data.frame(x) & is.list(x)){ | 72 if (!is.data.frame(x) & is.list(x)){ |
| 70 x = x[[1]] | 73 x = x[[1]] |
| 71 } | 74 } |
| 72 #x$Sample = factor(x$Sample, levels=unique(x$Sample)) | 75 #x$Sample = factor(x$Sample, levels=unique(x$Sample)) |
| 108 switched = T | 111 switched = T |
| 109 } | 112 } |
| 110 if(appendtxt){ | 113 if(appendtxt){ |
| 111 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) | 114 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3) |
| 112 } | 115 } |
| 113 cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T) | 116 cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T) |
| 114 | 117 |
| 115 if(mergeOn == "Clone_Sequence"){ | 118 if(mergeOn == "Clone_Sequence"){ |
| 116 patient1$merge = paste(patient1$Clone_Sequence) | 119 patient1$merge = paste(patient1$Clone_Sequence) |
| 117 patient2$merge = paste(patient2$Clone_Sequence) | 120 patient2$merge = paste(patient2$Clone_Sequence) |
| 118 } else { | 121 } else { |
| 129 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy' | 132 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy' |
| 130 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh | 133 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh |
| 131 | 134 |
| 132 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence | 135 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence |
| 133 | 136 |
| 134 | 137 start.time = proc.time() |
| 135 #fuzzy matching here... | 138 merge.list = c() |
| 136 if(mergeOn == "Clone_Sequence"){ | 139 |
| 140 if(patient %in% names(patient.merge.list)){ | |
| 141 patientMerge = patient.merge.list[[patient]] | |
| 142 merge.list[["second"]] = patient.merge.list.second[[patient]] | |
| 143 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T) | |
| 144 | |
| 145 print(names(patient.merge.list)) | |
| 146 } else { | |
| 147 #fuzzy matching here... | |
| 137 #merge.list = patientMerge$merge | 148 #merge.list = patientMerge$merge |
| 138 | 149 |
| 139 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),] | 150 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),] |
| 140 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] | 151 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),] |
| 141 | 152 |
| 142 patient1.fuzzy = patient1 | 153 patient1.fuzzy = patient1 |
| 143 patient2.fuzzy = patient2 | 154 patient2.fuzzy = patient2 |
| 144 | 155 |
| 145 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) | 156 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence) |
| 146 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) | 157 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence) |
| 147 | 158 |
| 148 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) | 159 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) |
| 149 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) | 160 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) |
| 150 | 161 |
| 151 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) | 162 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J) |
| 152 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) | 163 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J) |
| 153 | 164 |
| 154 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove? | 165 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove? |
| 155 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] | 166 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] |
| 156 | 167 |
| 157 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 168 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
| 158 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] | 169 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] |
| 159 | 170 |
| 160 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) | 171 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) |
| 161 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] | 172 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] |
| 162 | 173 |
| 163 merge.list = list() | 174 merge.list = list() |
| 164 | 175 |
| 168 while(nrow(patient.fuzzy) > 1){ | 179 while(nrow(patient.fuzzy) > 1){ |
| 169 first.merge = patient.fuzzy[1,"merge"] | 180 first.merge = patient.fuzzy[1,"merge"] |
| 170 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] | 181 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] |
| 171 first.sample = patient.fuzzy[1,"Sample"] | 182 first.sample = patient.fuzzy[1,"Sample"] |
| 172 merge.filter = first.merge == patient.fuzzy$merge | 183 merge.filter = first.merge == patient.fuzzy$merge |
| 173 | 184 |
| 174 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 | 185 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9 |
| 175 | 186 |
| 176 first.sample.filter = first.sample == patient.fuzzy$Sample | 187 first.sample.filter = first.sample == patient.fuzzy$Sample |
| 177 second.sample.filter = first.sample != patient.fuzzy$Sample | 188 second.sample.filter = first.sample != patient.fuzzy$Sample |
| 178 | 189 |
| 179 #first match same sample, sum to a single row, same for other sample | 190 #first match same sample, sum to a single row, same for other sample |
| 180 #then merge rows like 'normal' | 191 #then merge rows like 'normal' |
| 181 | 192 |
| 182 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) | 193 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence) |
| 183 | 194 |
| 184 | 195 |
| 185 | 196 |
| 186 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter | 197 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter |
| 264 | 275 |
| 265 } else { | 276 } else { |
| 266 patient.fuzzy = patient.fuzzy[-1,] | 277 patient.fuzzy = patient.fuzzy[-1,] |
| 267 } | 278 } |
| 268 } | 279 } |
| 269 | 280 patient.merge.list[[patient]] <<- patientMerge |
| 270 } | 281 patient.merge.list.second[[patient]] <<- merge.list[["second"]] |
| 271 | 282 cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T) |
| 283 } | |
| 284 | |
| 285 print(names(patient.merge.list)) | |
| 286 | |
| 272 | 287 |
| 273 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) | 288 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony]) |
| 274 res1 = vector() | 289 res1 = vector() |
| 275 res2 = vector() | 290 res2 = vector() |
| 276 resBoth = vector() | 291 resBoth = vector() |
