clonal_sequences_in_paired_samples: RScript.r comparison

comparison RScript.r @ 51:17e677c72e49 draft

Uploaded

author	davidvanzessen
date	Fri, 09 Oct 2015 06:58:17 -0400
parents	7dd7cefcf72d
children	c5c2a790d476

comparison

equal deleted inserted replaced

-:7dd7cefcf72d
+:17e677c72e49
 Titles = factor(Titles, levels=Titles)
 TitlesOrder = data.frame("Title"=Titles, "TitlesOrder"=1:length(Titles))
 single_patients = data.frame("Patient" = character(0),"Sample" = character(0), "on" = character(0), "Clone_Sequence" = character(0), "Frequency" = numeric(0), "normalized_read_count" = numeric(0), "V_Segment_Major_Gene" = character(0), "J_Segment_Major_Gene" = character(0), "Rearrangement" = character(0))
+patient.merge.list = list() #cache the 'both' table, 2x speedup for more memory...
+patient.merge.list.second = list()
 patientCountOnColumn <- function(x, product, interval, on, appendtxt=F){
 if (!is.data.frame(x) & is.list(x)){
 x = x[[1]]
 }
 #x$Sample = factor(x$Sample, levels=unique(x$Sample))
 switched = T
 }
 if(appendtxt){
 cat(paste(patient, oneSample, twoSample, type, sep="\t"), file="patients.txt", append=T, sep="", fill=3)
 }
-cat(paste("<tr><td>", patient, "</td></tr>", sep=""), file=logfile, append=T)
+cat(paste("<tr><td>", patient, "</td>", sep=""), file=logfile, append=T)
 if(mergeOn == "Clone_Sequence"){
 patient1$merge = paste(patient1$Clone_Sequence)
 patient2$merge = paste(patient2$Clone_Sequence)
 } else {
 #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") #merge alles 'fuzzy'
 patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")[NULL,] #blegh
 cs.exact.matches = patient1[patient1$Clone_Sequence %in% patient2$Clone_Sequence,]$Clone_Sequence
+start.time = proc.time()
-#fuzzy matching here...
+merge.list = c()
-if(mergeOn == "Clone_Sequence"){
+if(patient %in% names(patient.merge.list)){
+patientMerge = patient.merge.list[[patient]]
+merge.list[["second"]] = patient.merge.list.second[[patient]]
+cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (fetched from cache)</td></tr>", sep=""), file=logfile, append=T)
+print(names(patient.merge.list))
+} else {
+#fuzzy matching here...
 #merge.list = patientMerge$merge
 #patient1.fuzzy = patient1[!(patient1$merge %in% merge.list),]
 #patient2.fuzzy = patient2[!(patient2$merge %in% merge.list),]
 patient1.fuzzy = patient1
 patient2.fuzzy = patient2
 #patient1.fuzzy$merge = paste(patient1.fuzzy$V_Segment_Major_Gene, patient1.fuzzy$J_Segment_Major_Gene, patient1.fuzzy$CDR3_Sense_Sequence)
 #patient2.fuzzy$merge = paste(patient2.fuzzy$V_Segment_Major_Gene, patient2.fuzzy$J_Segment_Major_Gene, patient2.fuzzy$CDR3_Sense_Sequence)
 #patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence)
 #patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence)
 patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J)
 patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J)
 #merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) #also remove?
 #merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,]
 #patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
 #patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,]
 patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy)
 patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),]
 merge.list = list()
 while(nrow(patient.fuzzy) > 1){
 first.merge = patient.fuzzy[1,"merge"]
 first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"]
 first.sample = patient.fuzzy[1,"Sample"]
 merge.filter = first.merge == patient.fuzzy$merge
 #length.filter = nchar(patient.fuzzy$Clone_Sequence) - nchar(first.clone.sequence) <= 9
 first.sample.filter = first.sample == patient.fuzzy$Sample
 second.sample.filter = first.sample != patient.fuzzy$Sample
 #first match same sample, sum to a single row, same for other sample
 #then merge rows like 'normal'
 sequence.filter = grepl(paste("^", first.clone.sequence, sep=""), patient.fuzzy$Clone_Sequence)
 #match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) & length.filter & sample.filter
 } else {
 patient.fuzzy = patient.fuzzy[-1,]
 }
 }
+patient.merge.list[[patient]] <<- patientMerge
-}
+patient.merge.list.second[[patient]] <<- merge.list[["second"]]
+cat(paste("<td>", nrow(patient1), " in ", oneSample, " and ", nrow(patient2), " in ", twoSample, ", ", nrow(patientMerge), " in both (finding both took ", (proc.time() - start.time)[[3]], "s)</td></tr>", sep=""), file=logfile, append=T)
+}
+print(names(patient.merge.list))
 patientMerge$thresholdValue = pmax(patientMerge[,onx], patientMerge[,ony])
 res1 = vector()
 res2 = vector()
 resBoth = vector()

Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples

comparison RScript.r @ 51:17e677c72e49 draft