Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 47:d97e1421aa86 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 27 Jan 2016 10:25:43 -0500 |
| parents | 2e0a7c35082e |
| children | d08dfc8d5225 |
comparison
equal
deleted
inserted
replaced
| 46:fee06348bfad | 47:d97e1421aa86 |
|---|---|
| 39 ct = unlist(strsplit(clonaltype, ",")) | 39 ct = unlist(strsplit(clonaltype, ",")) |
| 40 species = args[5] #human or mouse | 40 species = args[5] #human or mouse |
| 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD | 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD |
| 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) | 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) |
| 43 clonality_method = args[8] | 43 clonality_method = args[8] |
| 44 filter_uniques = args[9] | |
| 44 | 45 |
| 45 # ---------------------- Data preperation ---------------------- | 46 # ---------------------- Data preperation ---------------------- |
| 46 | 47 |
| 47 print("Report Clonality - Data preperation") | 48 print("Report Clonality - Data preperation") |
| 48 | 49 |
| 55 | 56 |
| 56 #remove the allele from the V,D and J genes | 57 #remove the allele from the V,D and J genes |
| 57 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) | 58 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) |
| 58 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) | 59 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) |
| 59 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) | 60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) |
| 61 | |
| 62 #filter uniques | |
| 63 inputdata.removed = inputdata[NULL,] | |
| 64 | |
| 65 if(filter_uniques == "yes" && c("CDR1.Seq", "CDR2.Seq", "CDR3.Seq", "FR1.IMGT", "FR2.IMGT", "FR3.IMGT") %in% names(inputdata)){ | |
| 66 | |
| 67 clmns = names(inputdata) | |
| 68 | |
| 69 inputdata$unique.def = paste(inputdata$CDR1.Seq, inputdata$CDR2.Seq, inputdata$CDR3.Seq, inputdata$FR1.IMGT, inputdata$FR2.IMGT, inputdata$FR3.IMGT) | |
| 70 inputdata.filtered = inputdata[duplicated(inputdata$unique.def),] | |
| 71 fltr = inputdata$unique.def %in% inputdata.filtered$unique.def | |
| 72 | |
| 73 inputdata.removed = inputdata[!fltr,] | |
| 74 inputdata.removed$samples_replicates = paste(inputdata.removed$Sample, inputdata.removed$Replicate, sep="_") | |
| 75 | |
| 76 inputdata = inputdata[fltr,] | |
| 77 | |
| 78 inputdata = inputdata[,clmns] | |
| 79 | |
| 80 write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T) | |
| 81 } | |
| 82 | |
| 60 | 83 |
| 61 inputdata$clonaltype = 1:nrow(inputdata) | 84 inputdata$clonaltype = 1:nrow(inputdata) |
| 62 | 85 |
| 63 PRODF = inputdata | 86 PRODF = inputdata |
| 64 UNPROD = inputdata | 87 UNPROD = inputdata |
| 152 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) | 175 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) |
| 153 | 176 |
| 154 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) | 177 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) |
| 155 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) | 178 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) |
| 156 | 179 |
| 180 inputdata.removed.s = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("Sample")] | |
| 181 | |
| 182 sample_productive_count = merge(sample_productive_count, inputdata.removed.s, by="Sample") | |
| 183 | |
| 184 sample_productive_count$perc_rem = round(sample_productive_count$UniqueRemoved / sample_productive_count$All * 100) | |
| 185 | |
| 157 | 186 |
| 158 sample_replicate_productive_count = inputdata.dt[, list(All=.N, | 187 sample_replicate_productive_count = inputdata.dt[, list(All=.N, |
| 159 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), | 188 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), |
| 160 perc_prod = 1, | 189 perc_prod = 1, |
| 161 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), | 190 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), |
| 169 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) | 198 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) |
| 170 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) | 199 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) |
| 171 | 200 |
| 172 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) | 201 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) |
| 173 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) | 202 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) |
| 203 | |
| 204 inputdata.removed.sr = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("samples_replicates")] | |
| 205 | |
| 206 sample_replicate_productive_count = merge(sample_replicate_productive_count, inputdata.removed.sr, by="samples_replicates") | |
| 207 | |
| 208 sample_replicate_productive_count$perc_rem = round(sample_replicate_productive_count$UniqueRemoved / sample_productive_count$All * 100) | |
| 209 | |
| 174 | 210 |
| 175 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) | 211 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) |
| 176 | 212 |
| 177 counts = rbind(sample_replicate_productive_count, sample_productive_count) | 213 counts = rbind(sample_replicate_productive_count, sample_productive_count) |
| 178 counts = counts[order(counts$Sample),] | 214 counts = counts[order(counts$Sample),] |
