# HG changeset patch # User davidvanzessen # Date 1412589475 14400 # Node ID bc4612998d50dd34d79bc9b615273701f8988ddb # Parent 974febc99fd406591f3949a5333a70a43eaf683d Uploaded diff -r 974febc99fd4 -r bc4612998d50 RScript.r --- a/RScript.r Wed Oct 01 08:11:47 2014 -0400 +++ b/RScript.r Mon Oct 06 05:57:55 2014 -0400 @@ -25,7 +25,19 @@ str(dat) cat("Deduplication", file=logfile, append=T) -dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) +#dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) + +most.common = function(x){ + ux = unique(x) + if(length(ux) > 1){ + xtdf = data.frame(table(x)) + return(xtdf$Var1[which.max(xtdf$Freq)]) + #print(xtdf) + } + return(unique(x)) +} + +dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene=most.common(.SD$V_Segment_Major_Gene), J_Segment_Major_Gene=most.common(.SD$J_Segment_Major_Gene), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")]) cat("Calculating Frequency", file=logfile, append=T) dat$Frequency = ((10^dat$Log10_Frequency)*100)