Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 62:14ea4c464435 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Thu, 28 Apr 2016 05:24:14 -0400 |
| parents | 11ec9edfefee |
| children | bd6fb6c03948 |
comparison
equal
deleted
inserted
replaced
| 61:1349412c89c6 | 62:14ea4c464435 |
|---|---|
| 47 | 47 |
| 48 print("Report Clonality - Data preperation") | 48 print("Report Clonality - Data preperation") |
| 49 | 49 |
| 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") | 50 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") |
| 51 | 51 |
| 52 print(paste("nrows: ", nrow(inputdata))) | |
| 53 | |
| 52 setwd(outdir) | 54 setwd(outdir) |
| 53 | 55 |
| 54 # remove weird rows | 56 # remove weird rows |
| 55 inputdata = inputdata[inputdata$Sample != "",] | 57 inputdata = inputdata[inputdata$Sample != "",] |
| 58 | |
| 59 print(paste("nrows: ", nrow(inputdata))) | |
| 56 | 60 |
| 57 #remove the allele from the V,D and J genes | 61 #remove the allele from the V,D and J genes |
| 58 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) | 62 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) |
| 59 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) | 63 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) |
| 60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) | 64 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) |
| 61 | 65 |
| 66 print(paste("nrows: ", nrow(inputdata))) | |
| 67 | |
| 62 #filter uniques | 68 #filter uniques |
| 63 inputdata.removed = inputdata[NULL,] | 69 inputdata.removed = inputdata[NULL,] |
| 70 | |
| 71 print(paste("nrows: ", nrow(inputdata))) | |
| 64 | 72 |
| 65 inputdata$clonaltype = 1:nrow(inputdata) | 73 inputdata$clonaltype = 1:nrow(inputdata) |
| 66 | 74 |
| 67 #keep track of the count of sequences in samples or samples/replicates for the front page overview | 75 #keep track of the count of sequences in samples or samples/replicates for the front page overview |
| 68 input.sample.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample")]) | 76 input.sample.count = data.frame(data.table(inputdata)[, list(All=.N), by=c("Sample")]) |
| 142 | 150 |
| 143 print("Report Clonality - counting productive/unproductive/unique") | 151 print("Report Clonality - counting productive/unproductive/unique") |
| 144 | 152 |
| 145 #create the table on the overview page with the productive/unique counts per sample/replicate | 153 #create the table on the overview page with the productive/unique counts per sample/replicate |
| 146 #first for sample | 154 #first for sample |
| 147 sample.count = merge(input.sample.count, prod.sample.count, by="Sample") | 155 sample.count = merge(input.sample.count, prod.sample.count, by="Sample", all.x=T) |
| 148 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) | 156 sample.count$perc_prod = round(sample.count$Productive / sample.count$All * 100) |
| 149 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample") | 157 sample.count = merge(sample.count, prod.unique.sample.count, by="Sample", all.x=T) |
| 150 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) | 158 sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) |
| 151 | 159 |
| 152 sample.count = merge(sample.count , unprod.sample.count, by="Sample") | 160 sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T) |
| 161 print(sample.count) | |
| 153 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) | 162 sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) |
| 154 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample") | 163 sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T) |
| 155 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) | 164 sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) |
| 156 | 165 |
| 166 print(sample.count) | |
| 167 | |
| 157 #then sample/replicate | 168 #then sample/replicate |
| 158 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate")) | 169 rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T) |
| 159 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) | 170 rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) |
| 160 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate")) | 171 rep.count = merge(rep.count, prod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) |
| 161 rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100) | 172 rep.count$perc_prod_un = round(rep.count$Productive_unique / rep.count$All * 100) |
| 162 | 173 |
| 163 rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate")) | 174 rep.count = merge(rep.count, unprod.rep.count, by=c("Sample", "Replicate"), all.x=T) |
| 164 rep.count$perc_unprod = round(rep.count$Unproductive / rep.count$All * 100) | 175 rep.count$perc_unprod = round(rep.count$Unproductive / rep.count$All * 100) |
| 165 rep.count = merge(rep.count, unprod.unique.rep.count, by=c("Sample", "Replicate")) | 176 rep.count = merge(rep.count, unprod.unique.rep.count, by=c("Sample", "Replicate"), all.x=T) |
| 166 rep.count$perc_unprod_un = round(rep.count$Unproductive_unique / rep.count$All * 100) | 177 rep.count$perc_unprod_un = round(rep.count$Unproductive_unique / rep.count$All * 100) |
| 167 | 178 |
| 168 rep.count$Sample = paste(rep.count$Sample, rep.count$Replicate, sep="_") | 179 rep.count$Sample = paste(rep.count$Sample, rep.count$Replicate, sep="_") |
| 169 rep.count = rep.count[,names(rep.count) != "Replicate"] | 180 rep.count = rep.count[,names(rep.count) != "Replicate"] |
| 170 | 181 |
| 171 count = rbind(sample.count, rep.count) | 182 count = rbind(sample.count, rep.count) |
| 183 | |
| 184 | |
| 172 | 185 |
| 173 write.table(x=count, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F) | 186 write.table(x=count, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F) |
| 174 | 187 |
| 175 # ---------------------- Frequency calculation for V, D and J ---------------------- | 188 # ---------------------- Frequency calculation for V, D and J ---------------------- |
| 176 | 189 |
| 625 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) | 638 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) |
| 626 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) | 639 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) |
| 627 } | 640 } |
| 628 } | 641 } |
| 629 | 642 |
| 643 bak = PRODF | |
| 644 | |
| 630 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") | 645 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") |
| 631 if(all(imgtcolumns %in% colnames(inputdata))) | 646 if(all(imgtcolumns %in% colnames(inputdata))) |
| 632 { | 647 { |
| 633 print("found IMGT columns, running junction analysis") | 648 print("found IMGT columns, running junction analysis") |
| 634 | 649 |
| 635 if(locus %in% c("IGK","IGL", "TRA", "TRG")){ | 650 if(locus %in% c("IGK","IGL", "TRA", "TRG")){ |
| 636 print("VJ recombination, using N column for junction analysis") | 651 print("VJ recombination, no filtering on absent D") |
| 637 print(names(PRODF)) | 652 } else { |
| 638 print(head(PRODF$N.REGION.nt.nb, 30)) | 653 print("VDJ recombination, using N column for junction analysis") |
| 639 PRODF$N1.REGION.nt.nb = PRODF$N.REGION.nt.nb | 654 fltr = nchar(PRODF$Top.D.Gene) < 4 |
| 655 print(paste("Removing", sum(fltr), "sequences without a identified D")) | |
| 656 PRODF = PRODF[!fltr,] | |
| 640 } | 657 } |
| 641 | 658 |
| 642 num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) } | 659 num_median = function(x, na.rm) { as.numeric(median(x, na.rm=na.rm)) } |
| 660 | |
| 661 if(locus %in% c("IGH", "TRB", "TRD")){ | |
| 662 PRODF$new.n = PRODF$N1.REGION.nt.nb + PRODF$N2.REGION.nt.nb | |
| 663 } else { | |
| 664 PRODF$new.n = PRODF$N.REGION.nt.nb | |
| 665 } | |
| 643 | 666 |
| 644 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 667 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
| 645 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 668 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 646 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 669 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 647 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T), | 670 N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), |
| 648 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 671 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
| 649 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 672 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
| 650 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 673 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
| 651 P3=mean(.SD$P3D.nt.nb, na.rm=T), | 674 P3=mean(.SD$P3D.nt.nb, na.rm=T), |
| 652 N2=mean(.SD$N2.REGION.nt.nb, na.rm=T), | 675 N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
| 653 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 676 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
| 654 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 677 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 655 Total.Del=( mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T) + | 678 Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 656 mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T) + | 679 Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
| 657 mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T) + | 680 Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), |
| 658 mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T)), | |
| 659 | |
| 660 Total.N=( mean(.SD$N1.REGION.nt.nb, na.rm=T) + | |
| 661 mean(.SD$N2.REGION.nt.nb, na.rm=T)), | |
| 662 | |
| 663 Total.P=( mean(.SD$P3V.nt.nb, na.rm=T) + | |
| 664 mean(.SD$P5D.nt.nb, na.rm=T) + | |
| 665 mean(.SD$P3D.nt.nb, na.rm=T) + | |
| 666 mean(.SD$P5J.nt.nb, na.rm=T))), | |
| 667 by=c("Sample")]) | 681 by=c("Sample")]) |
| 668 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 682 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 669 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 683 write.table(newData, "junctionAnalysisProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 670 | 684 |
| 671 newData = data.frame(data.table(PRODF)[,list(unique=.N, | 685 newData = data.frame(data.table(PRODF)[,list(unique=.N, |
| 672 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 686 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 673 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 687 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
| 674 N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T), | 688 N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), |
| 675 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 689 P2=num_median(.SD$P5D.nt.nb, na.rm=T), |
| 676 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 690 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
| 677 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 691 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
| 678 P3=num_median(.SD$P3D.nt.nb, na.rm=T), | 692 P3=num_median(.SD$P3D.nt.nb, na.rm=T), |
| 679 N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T), | 693 N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
| 680 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 694 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 681 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 695 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 682 Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, | 696 Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 683 .SD$X5D.REGION.trimmed.nt.nb, | 697 Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
| 684 .SD$X3D.REGION.trimmed.nt.nb, | 698 Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), |
| 685 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T), | |
| 686 Total.N=num_median( c(.SD$N1.REGION.nt.nb, | |
| 687 .SD$N2.REGION.nt.nb), na.rm=T), | |
| 688 Total.P=num_median(c(.SD$P3V.nt.nb, | |
| 689 .SD$P5D.nt.nb, | |
| 690 .SD$P3D.nt.nb, | |
| 691 .SD$P5J.nt.nb), na.rm=T)), | |
| 692 by=c("Sample")]) | 699 by=c("Sample")]) |
| 693 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 700 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 694 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 701 write.table(newData, "junctionAnalysisProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 695 | 702 |
| 696 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 703 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
| 697 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 704 VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 698 P1=mean(.SD$P3V.nt.nb, na.rm=T), | 705 P1=mean(.SD$P3V.nt.nb, na.rm=T), |
| 699 N1=mean(.SD$N1.REGION.nt.nb, na.rm=T), | 706 N1=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), |
| 700 P2=mean(.SD$P5D.nt.nb, na.rm=T), | 707 P2=mean(.SD$P5D.nt.nb, na.rm=T), |
| 701 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 708 DEL.DH=mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
| 702 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 709 DH.DEL=mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
| 703 P3=mean(.SD$P3D.nt.nb, na.rm=T), | 710 P3=mean(.SD$P3D.nt.nb, na.rm=T), |
| 704 N2=mean(.SD$N2.REGION.nt.nb, na.rm=T), | 711 N2=mean(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
| 705 P4=mean(.SD$P5J.nt.nb, na.rm=T), | 712 P4=mean(.SD$P5J.nt.nb, na.rm=T), |
| 706 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 713 DEL.JH=mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 707 Total.Del=(mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T) + | 714 Total.Del=mean(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 708 mean(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T) + | 715 Total.N=mean(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
| 709 mean(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T) + | 716 Total.P=mean(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), |
| 710 mean(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T)), | |
| 711 Total.N=( mean(.SD$N1.REGION.nt.nb, na.rm=T) + | |
| 712 mean(.SD$N2.REGION.nt.nb, na.rm=T)), | |
| 713 Total.P=( mean(.SD$P3V.nt.nb, na.rm=T) + | |
| 714 mean(.SD$P5D.nt.nb, na.rm=T) + | |
| 715 mean(.SD$P3D.nt.nb, na.rm=T) + | |
| 716 mean(.SD$P5J.nt.nb, na.rm=T))), | |
| 717 by=c("Sample")]) | 717 by=c("Sample")]) |
| 718 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 718 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 719 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 719 write.table(newData, "junctionAnalysisUnProd_mean.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 720 | 720 |
| 721 newData = data.frame(data.table(UNPROD)[,list(unique=.N, | 721 newData = data.frame(data.table(UNPROD)[,list(unique=.N, |
| 722 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), | 722 VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T), |
| 723 P1=num_median(.SD$P3V.nt.nb, na.rm=T), | 723 P1=num_median(.SD$P3V.nt.nb, na.rm=T), |
| 724 N1=num_median(.SD$N1.REGION.nt.nb, na.rm=T), | 724 N1=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb, na.rm=T), |
| 725 P2=num_median(.SD$P5D.nt.nb, na.rm=T), | 725 P2=num_median(.SD$P5D.nt.nb, na.rm=T), |
| 726 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), | 726 DEL.DH=num_median(.SD$X5D.REGION.trimmed.nt.nb, na.rm=T), |
| 727 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), | 727 DH.DEL=num_median(.SD$X3D.REGION.trimmed.nt.nb, na.rm=T), |
| 728 P3=num_median(.SD$P3D.nt.nb, na.rm=T), | 728 P3=num_median(.SD$P3D.nt.nb, na.rm=T), |
| 729 N2=num_median(.SD$N2.REGION.nt.nb, na.rm=T), | 729 N2=num_median(.SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
| 730 P4=num_median(.SD$P5J.nt.nb, na.rm=T), | 730 P4=num_median(.SD$P5J.nt.nb, na.rm=T), |
| 731 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), | 731 DEL.JH=num_median(.SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 732 Total.Del=num_median(c(.SD$X3V.REGION.trimmed.nt.nb, | 732 Total.Del=num_median(.SD$X3V.REGION.trimmed.nt.nb + .SD$X5D.REGION.trimmed.nt.nb + .SD$X3D.REGION.trimmed.nt.nb + .SD$X5J.REGION.trimmed.nt.nb, na.rm=T), |
| 733 .SD$X5D.REGION.trimmed.nt.nb, | 733 Total.N=num_median(.SD$N.REGION.nt.nb + .SD$N1.REGION.nt.nb + .SD$N2.REGION.nt.nb + .SD$N3.REGION.nt.nb + .SD$N4.REGION.nt.nb, na.rm=T), |
| 734 .SD$X3D.REGION.trimmed.nt.nb, | 734 Total.P=num_median(.SD$P3V.nt.nb + .SD$P5D.nt.nb + .SD$P3D.nt.nb + .SD$P5J.nt.nb, na.rm=T)), |
| 735 .SD$X5J.REGION.trimmed.nt.nb), na.rm=T), | |
| 736 Total.N=num_median( c(.SD$N1.REGION.nt.nb, | |
| 737 .SD$N2.REGION.nt.nb), na.rm=T), | |
| 738 Total.P=num_median(c(.SD$P3V.nt.nb, | |
| 739 .SD$P5D.nt.nb, | |
| 740 .SD$P3D.nt.nb, | |
| 741 .SD$P5J.nt.nb), na.rm=T)), | |
| 742 by=c("Sample")]) | 735 by=c("Sample")]) |
| 743 | 736 |
| 744 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) | 737 newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1) |
| 745 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) | 738 write.table(newData, "junctionAnalysisUnProd_median.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) |
| 746 } | 739 } |
| 740 | |
| 741 PRODF = bak | |
| 747 | 742 |
| 748 # ---------------------- AA composition in CDR3 ---------------------- | 743 # ---------------------- AA composition in CDR3 ---------------------- |
| 749 | 744 |
| 750 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] | 745 AACDR3 = PRODF[,c("Sample", "CDR3.Seq")] |
| 751 | 746 |
