Mercurial > repos > davidvanzessen > report_clonality_igg
comparison RScript.r @ 15:c6d0ee9b3d91 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Thu, 04 Dec 2014 10:53:23 -0500 |
| parents | 8002401b83c4 |
| children | ff84987df4f8 |
comparison
equal
deleted
inserted
replaced
| 14:8002401b83c4 | 15:c6d0ee9b3d91 |
|---|---|
| 84 sampleFile <- file("samples.txt") | 84 sampleFile <- file("samples.txt") |
| 85 un = unique(inputdata$Sample) | 85 un = unique(inputdata$Sample) |
| 86 un = paste(un, sep="\n") | 86 un = paste(un, sep="\n") |
| 87 writeLines(un, sampleFile) | 87 writeLines(un, sampleFile) |
| 88 close(sampleFile) | 88 close(sampleFile) |
| 89 | |
| 90 # ---------------------- Counting the productive/unproductive and unique sequences ---------------------- | |
| 91 | |
| 92 inputdata.dt = data.table(inputdata) #for speed | |
| 93 | |
| 94 ct = unlist(strsplit(clonaltype, ",")) | |
| 95 if(clonaltype == "none"){ | |
| 96 ct = c("ID") | |
| 97 } | |
| 98 | |
| 99 inputdata.dt$samples_replicates = paste(inputdata.dt$Sample, inputdata.dt$Replicate, sep="_") | |
| 100 samples_replicates = c(unique(inputdata.dt$samples_replicates), unique(as.character(inputdata.dt$Sample))) | |
| 101 frequency_table = data.frame(ID = samples_replicates[order(samples_replicates)]) | |
| 102 | |
| 103 | |
| 104 sample_productive_count = inputdata.dt[, list(All=.N, | |
| 105 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), | |
| 106 perc_prod = 1, | |
| 107 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), | |
| 108 perc_prod_un = 1, | |
| 109 Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]), | |
| 110 perc_unprod = 1, | |
| 111 Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]), | |
| 112 perc_unprod_un = 1), | |
| 113 by=c("Sample")] | |
| 114 | |
| 115 sample_productive_count$perc_prod = round(sample_productive_count$Productive / sample_productive_count$All * 100) | |
| 116 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) | |
| 117 | |
| 118 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) | |
| 119 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) | |
| 120 | |
| 121 | |
| 122 sample_replicate_productive_count = inputdata.dt[, list(All=.N, | |
| 123 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), | |
| 124 perc_prod = 1, | |
| 125 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), | |
| 126 perc_prod_un = 1, | |
| 127 Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]), | |
| 128 perc_unprod = 1, | |
| 129 Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]), | |
| 130 perc_unprod_un = 1), | |
| 131 by=c("samples_replicates")] | |
| 132 | |
| 133 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) | |
| 134 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) | |
| 135 | |
| 136 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) | |
| 137 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) | |
| 138 | |
| 139 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) | |
| 140 | |
| 141 counts = rbind(sample_replicate_productive_count, sample_productive_count) | |
| 142 counts = counts[order(counts$Sample),] | |
| 143 | |
| 144 write.table(x=counts, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F) | |
| 89 | 145 |
| 90 # ---------------------- Frequency calculation for V, D and J ---------------------- | 146 # ---------------------- Frequency calculation for V, D and J ---------------------- |
| 91 | 147 |
| 92 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) | 148 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) |
| 93 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 149 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) |
