Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
comparison RScript.r @ 11:bc4612998d50 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 06 Oct 2014 05:57:55 -0400 |
parents | 974febc99fd4 |
children | eb5b569b44dd |
comparison
equal
deleted
inserted
replaced
10:974febc99fd4 | 11:bc4612998d50 |
---|---|
23 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1))) | 23 dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1))) |
24 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1))) | 24 dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1))) |
25 | 25 |
26 str(dat) | 26 str(dat) |
27 cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T) | 27 cat("<tr><td>Deduplication</td></tr>", file=logfile, append=T) |
28 dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) | 28 #dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")]) |
29 | |
30 most.common = function(x){ | |
31 ux = unique(x) | |
32 if(length(ux) > 1){ | |
33 xtdf = data.frame(table(x)) | |
34 return(xtdf$Var1[which.max(xtdf$Freq)]) | |
35 #print(xtdf) | |
36 } | |
37 return(unique(x)) | |
38 } | |
39 | |
40 dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene=most.common(.SD$V_Segment_Major_Gene), J_Segment_Major_Gene=most.common(.SD$J_Segment_Major_Gene), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")]) | |
29 | 41 |
30 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T) | 42 cat("<tr><td>Calculating Frequency</td></tr>", file=logfile, append=T) |
31 dat$Frequency = ((10^dat$Log10_Frequency)*100) | 43 dat$Frequency = ((10^dat$Log10_Frequency)*100) |
32 | 44 |
33 dat = dat[dat$Frequency >= min_freq,] | 45 dat = dat[dat$Frequency >= min_freq,] |