comparison RScript.r @ 11:bc4612998d50 draft

Uploaded
author davidvanzessen
date Mon, 06 Oct 2014 05:57:55 -0400
parents 974febc99fd4
children eb5b569b44dd
comparison
equal deleted inserted replaced
10:974febc99fd4 11:bc4612998d50
23 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1))) 23 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
24 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1))) 24 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
25 25
26 str(dat) 26 str(dat)
27 cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T) 27 cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T)
28 dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) 28 #dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
29
30 most.common = function(x){
31 ux = unique(x)
32 if(length(ux) > 1){
33 xtdf = data.frame(table(x))
34 return(xtdf$Var1[which.max(xtdf$Freq)])
35 #print(xtdf)
36 }
37 return(unique(x))
38 }
39
40 dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene=most.common(.SD$V_Segment_Major_Gene), J_Segment_Major_Gene=most.common(.SD$J_Segment_Major_Gene), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")])
29 41
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T) 42 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T)
31 dat$Frequency = ((10^dat$Log10_Frequency)*100) 43 dat$Frequency = ((10^dat$Log10_Frequency)*100)
32 44
33 dat = dat[dat$Frequency >= min_freq,] 45 dat = dat[dat$Frequency >= min_freq,]