Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
diff RScript.r @ 11:bc4612998d50 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 06 Oct 2014 05:57:55 -0400 |
parents | 974febc99fd4 |
children | eb5b569b44dd |
line wrap: on
line diff
--- a/RScript.r Wed Oct 01 08:11:47 2014 -0400 +++ b/RScript.r Mon Oct 06 05:57:55 2014 -0400 @@ -25,7 +25,19 @@ str(dat) cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T) -dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) +#dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) + +most.common = function(x){ + ux = unique(x) + if(length(ux) > 1){ + xtdf = data.frame(table(x)) + return(xtdf$Var1[which.max(xtdf$Freq)]) + #print(xtdf) + } + return(unique(x)) +} + +dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene=most.common(.SD$V_Segment_Major_Gene), J_Segment_Major_Gene=most.common(.SD$J_Segment_Major_Gene), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")]) cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T) dat$Frequency = ((10^dat$Log10_Frequency)*100)