diff RScript.r @ 15:c6d0ee9b3d91 draft

Uploaded
author davidvanzessen
date Thu, 04 Dec 2014 10:53:23 -0500
parents 8002401b83c4
children ff84987df4f8
line wrap: on
line diff
--- a/RScript.r	Fri Nov 14 04:15:43 2014 -0500
+++ b/RScript.r	Thu Dec 04 10:53:23 2014 -0500
@@ -87,6 +87,62 @@
 writeLines(un, sampleFile)
 close(sampleFile)
 
+# ---------------------- Counting the productive/unproductive and unique sequences ----------------------
+
+inputdata.dt = data.table(inputdata) #for speed
+
+ct = unlist(strsplit(clonaltype, ","))
+if(clonaltype == "none"){
+	ct = c("ID")
+}
+
+inputdata.dt$samples_replicates = paste(inputdata.dt$Sample, inputdata.dt$Replicate, sep="_")
+samples_replicates = c(unique(inputdata.dt$samples_replicates), unique(as.character(inputdata.dt$Sample)))
+frequency_table = data.frame(ID = samples_replicates[order(samples_replicates)])
+
+
+sample_productive_count = inputdata.dt[, list(All=.N, 
+                                              Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), 
+                                              perc_prod = 1,
+                                              Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), 
+                                              perc_prod_un = 1,
+                                              Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]),
+                                              perc_unprod = 1,
+                                              Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]),
+                                              perc_unprod_un = 1),
+                                       by=c("Sample")]
+
+sample_productive_count$perc_prod = round(sample_productive_count$Productive / sample_productive_count$All * 100)
+sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100)
+
+sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100)
+sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100)
+
+
+sample_replicate_productive_count = inputdata.dt[, list(All=.N, 
+                                                        Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), 
+                                                        perc_prod = 1,
+                                                        Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), 
+                                                        perc_prod_un = 1,
+                                                        Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]),
+                                                        perc_unprod = 1,
+                                                        Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]),
+                                                        perc_unprod_un = 1),
+                                                 by=c("samples_replicates")]
+
+sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100)
+sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100)
+
+sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100)
+sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100)
+
+setnames(sample_replicate_productive_count, colnames(sample_productive_count))
+
+counts = rbind(sample_replicate_productive_count, sample_productive_count)
+counts = counts[order(counts$Sample),]
+
+write.table(x=counts, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F)
+
 # ---------------------- Frequency calculation for V, D and J ----------------------
 
 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")])