Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 54:5ba0377b7737 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Fri, 29 Jan 2016 08:10:21 -0500 |
| parents | 379856bef228 |
| children | 67627d77d63b |
comparison
equal
deleted
inserted
replaced
| 53:379856bef228 | 54:5ba0377b7737 |
|---|---|
| 39 ct = unlist(strsplit(clonaltype, ",")) | 39 ct = unlist(strsplit(clonaltype, ",")) |
| 40 species = args[5] #human or mouse | 40 species = args[5] #human or mouse |
| 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD | 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD |
| 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) | 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) |
| 43 clonality_method = args[8] | 43 clonality_method = args[8] |
| 44 filter_uniques = args[9] | 44 |
| 45 | |
| 46 print(paste("filter_uniques", filter_uniques)) | |
| 47 | 45 |
| 48 # ---------------------- Data preperation ---------------------- | 46 # ---------------------- Data preperation ---------------------- |
| 49 | 47 |
| 50 print("Report Clonality - Data preperation") | 48 print("Report Clonality - Data preperation") |
| 51 | 49 |
| 61 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) | 59 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) |
| 62 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) | 60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) |
| 63 | 61 |
| 64 #filter uniques | 62 #filter uniques |
| 65 inputdata.removed = inputdata[NULL,] | 63 inputdata.removed = inputdata[NULL,] |
| 66 | |
| 67 filter_uniques = filter_uniques == "yes" && c("CDR1.Seq", "CDR2.Seq", "CDR3.Seq", "FR1.IMGT", "FR2.IMGT", "FR3.IMGT") %in% names(inputdata) | |
| 68 | |
| 69 print(paste("filter_uniques", filter_uniques)) | |
| 70 | |
| 71 if(filter_uniques){ | |
| 72 | |
| 73 clmns = names(inputdata) | |
| 74 | |
| 75 inputdata$unique.def = paste(inputdata$CDR1.Seq, inputdata$CDR2.Seq, inputdata$CDR3.Seq, inputdata$FR1.IMGT, inputdata$FR2.IMGT, inputdata$FR3.IMGT) | |
| 76 inputdata.filtered = inputdata[duplicated(inputdata$unique.def),] | |
| 77 fltr = inputdata$unique.def %in% inputdata.filtered$unique.def | |
| 78 | |
| 79 inputdata.removed = inputdata[!fltr,] | |
| 80 inputdata.removed$samples_replicates = paste(inputdata.removed$Sample, inputdata.removed$Replicate, sep="_") | |
| 81 | |
| 82 inputdata = inputdata[fltr,] | |
| 83 | |
| 84 inputdata = inputdata[,clmns] | |
| 85 | |
| 86 write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T) | |
| 87 } | |
| 88 | |
| 89 | 64 |
| 90 inputdata$clonaltype = 1:nrow(inputdata) | 65 inputdata$clonaltype = 1:nrow(inputdata) |
| 91 | 66 |
| 92 PRODF = inputdata | 67 PRODF = inputdata |
| 93 UNPROD = inputdata | 68 UNPROD = inputdata |
| 181 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) | 156 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) |
| 182 | 157 |
| 183 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) | 158 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) |
| 184 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) | 159 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) |
| 185 | 160 |
| 186 | |
| 187 if(filter_uniques){ | |
| 188 inputdata.removed.s = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("Sample")] | |
| 189 | |
| 190 sample_productive_count = merge(sample_productive_count, inputdata.removed.s, by="Sample") | |
| 191 | |
| 192 sample_productive_count$perc_rem = round(sample_productive_count$UniqueRemoved / sample_productive_count$All * 100) | |
| 193 } else { | |
| 194 sample_productive_count$UniqueRemoved = 0 | |
| 195 sample_productive_count$perc_rem = 0 | |
| 196 } | |
| 197 | |
| 198 sample_replicate_productive_count = inputdata.dt[, list(All=.N, | 161 sample_replicate_productive_count = inputdata.dt[, list(All=.N, |
| 199 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), | 162 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), |
| 200 perc_prod = 1, | 163 perc_prod = 1, |
| 201 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), | 164 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), |
| 202 perc_prod_un = 1, | 165 perc_prod_un = 1, |
| 209 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) | 172 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) |
| 210 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) | 173 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) |
| 211 | 174 |
| 212 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) | 175 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) |
| 213 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) | 176 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) |
| 214 | |
| 215 | |
| 216 if(filter_uniques){ | |
| 217 inputdata.removed.sr = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("samples_replicates")] | |
| 218 | |
| 219 sample_replicate_productive_count = merge(sample_replicate_productive_count, inputdata.removed.sr, by="samples_replicates") | |
| 220 | |
| 221 sample_replicate_productive_count$perc_rem = round(sample_replicate_productive_count$UniqueRemoved / sample_productive_count$All * 100) | |
| 222 } else { | |
| 223 sample_replicate_productive_count$UniqueRemoved = 0 | |
| 224 sample_replicate_productive_count$perc_rem = 0 | |
| 225 } | |
| 226 | 177 |
| 227 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) | 178 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) |
| 228 | 179 |
| 229 counts = rbind(sample_replicate_productive_count, sample_productive_count) | 180 counts = rbind(sample_replicate_productive_count, sample_productive_count) |
| 230 counts = counts[order(counts$Sample),] | 181 counts = counts[order(counts$Sample),] |
