# HG changeset patch
# User davidvanzessen
# Date 1412589475 14400
# Node ID bc4612998d50dd34d79bc9b615273701f8988ddb
# Parent 974febc99fd406591f3949a5333a70a43eaf683d
Uploaded
diff -r 974febc99fd4 -r bc4612998d50 RScript.r
--- a/RScript.r Wed Oct 01 08:11:47 2014 -0400
+++ b/RScript.r Mon Oct 06 05:57:55 2014 -0400
@@ -25,7 +25,19 @@
str(dat)
cat("
Deduplication |
", file=logfile, append=T)
-dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
+#dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
+
+most.common = function(x){
+ ux = unique(x)
+ if(length(ux) > 1){
+ xtdf = data.frame(table(x))
+ return(xtdf$Var1[which.max(xtdf$Freq)])
+ #print(xtdf)
+ }
+ return(unique(x))
+}
+
+dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene=most.common(.SD$V_Segment_Major_Gene), J_Segment_Major_Gene=most.common(.SD$J_Segment_Major_Gene), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")])
cat("Calculating Frequency |
", file=logfile, append=T)
dat$Frequency = ((10^dat$Log10_Frequency)*100)