comparison report_clonality/RScript.r @ 54:5ba0377b7737 draft

Uploaded
author davidvanzessen
date Fri, 29 Jan 2016 08:10:21 -0500
parents 379856bef228
children 67627d77d63b
comparison
equal deleted inserted replaced
53:379856bef228 54:5ba0377b7737
39 ct = unlist(strsplit(clonaltype, ",")) 39 ct = unlist(strsplit(clonaltype, ","))
40 species = args[5] #human or mouse 40 species = args[5] #human or mouse
41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD
42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no)
43 clonality_method = args[8] 43 clonality_method = args[8]
44 filter_uniques = args[9] 44
45
46 print(paste("filter_uniques", filter_uniques))
47 45
48 # ---------------------- Data preperation ---------------------- 46 # ---------------------- Data preperation ----------------------
49 47
50 print("Report Clonality - Data preperation") 48 print("Report Clonality - Data preperation")
51 49
61 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) 59 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene)
62 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) 60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene)
63 61
64 #filter uniques 62 #filter uniques
65 inputdata.removed = inputdata[NULL,] 63 inputdata.removed = inputdata[NULL,]
66
67 filter_uniques = filter_uniques == "yes" && c("CDR1.Seq", "CDR2.Seq", "CDR3.Seq", "FR1.IMGT", "FR2.IMGT", "FR3.IMGT") %in% names(inputdata)
68
69 print(paste("filter_uniques", filter_uniques))
70
71 if(filter_uniques){
72
73 clmns = names(inputdata)
74
75 inputdata$unique.def = paste(inputdata$CDR1.Seq, inputdata$CDR2.Seq, inputdata$CDR3.Seq, inputdata$FR1.IMGT, inputdata$FR2.IMGT, inputdata$FR3.IMGT)
76 inputdata.filtered = inputdata[duplicated(inputdata$unique.def),]
77 fltr = inputdata$unique.def %in% inputdata.filtered$unique.def
78
79 inputdata.removed = inputdata[!fltr,]
80 inputdata.removed$samples_replicates = paste(inputdata.removed$Sample, inputdata.removed$Replicate, sep="_")
81
82 inputdata = inputdata[fltr,]
83
84 inputdata = inputdata[,clmns]
85
86 write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T)
87 }
88
89 64
90 inputdata$clonaltype = 1:nrow(inputdata) 65 inputdata$clonaltype = 1:nrow(inputdata)
91 66
92 PRODF = inputdata 67 PRODF = inputdata
93 UNPROD = inputdata 68 UNPROD = inputdata
181 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) 156 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100)
182 157
183 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) 158 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100)
184 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) 159 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100)
185 160
186
187 if(filter_uniques){
188 inputdata.removed.s = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("Sample")]
189
190 sample_productive_count = merge(sample_productive_count, inputdata.removed.s, by="Sample")
191
192 sample_productive_count$perc_rem = round(sample_productive_count$UniqueRemoved / sample_productive_count$All * 100)
193 } else {
194 sample_productive_count$UniqueRemoved = 0
195 sample_productive_count$perc_rem = 0
196 }
197
198 sample_replicate_productive_count = inputdata.dt[, list(All=.N, 161 sample_replicate_productive_count = inputdata.dt[, list(All=.N,
199 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), 162 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]),
200 perc_prod = 1, 163 perc_prod = 1,
201 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), 164 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]),
202 perc_prod_un = 1, 165 perc_prod_un = 1,
209 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) 172 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100)
210 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) 173 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100)
211 174
212 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) 175 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100)
213 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) 176 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100)
214
215
216 if(filter_uniques){
217 inputdata.removed.sr = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("samples_replicates")]
218
219 sample_replicate_productive_count = merge(sample_replicate_productive_count, inputdata.removed.sr, by="samples_replicates")
220
221 sample_replicate_productive_count$perc_rem = round(sample_replicate_productive_count$UniqueRemoved / sample_productive_count$All * 100)
222 } else {
223 sample_replicate_productive_count$UniqueRemoved = 0
224 sample_replicate_productive_count$perc_rem = 0
225 }
226 177
227 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) 178 setnames(sample_replicate_productive_count, colnames(sample_productive_count))
228 179
229 counts = rbind(sample_replicate_productive_count, sample_productive_count) 180 counts = rbind(sample_replicate_productive_count, sample_productive_count)
230 counts = counts[order(counts$Sample),] 181 counts = counts[order(counts$Sample),]