Mercurial > repos > davidvanzessen > baseline_bayesian_estimation
comparison filter.r @ 5:d8de51314d3f draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Thu, 14 Aug 2014 07:17:26 -0400 |
| parents | 38c038c17d0c |
| children | 82016fd934c8 |
comparison
equal
deleted
inserted
replaced
| 4:1726dc8f3e0a | 5:d8de51314d3f |
|---|---|
| 1 arg = commandArgs(TRUE) | 1 arg = commandArgs(TRUE) |
| 2 summaryfile = arg[1] | 2 summaryfile = arg[1] |
| 3 gappedfile = arg[2] | 3 gappedfile = arg[2] |
| 4 selection = arg[3] | 4 selection = arg[3] |
| 5 output = arg[4] | 5 output = arg[4] |
| 6 print(paste("-----", selection, "------")) | 6 print(paste("selection = ", selection)) |
| 7 | 7 |
| 8 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")] | 8 |
| 9 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F) | |
| 9 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) | 10 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F) |
| 10 head(summarydat) | |
| 11 head(gappeddat) | |
| 12 | 11 |
| 13 dat = merge(gappeddat, summarydat, by="Sequence.ID") | 12 |
| 14 head(dat) | 13 |
| 14 #dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T)) | |
| 15 | |
| 16 dat = cbind(gappeddat, summarydat$AA.JUNCTION) | |
| 17 | |
| 18 colnames(dat)[length(dat)] = "AA.JUNCTION" | |
| 15 | 19 |
| 16 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) | 20 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele) |
| 17 dat$VGene = gsub("[*].*", "", dat$VGene) | 21 dat$VGene = gsub("[*].*", "", dat$VGene) |
| 18 | 22 |
| 19 dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) | 23 dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele) |
| 20 dat$DGene = gsub("[*].*", "", dat$DGene) | 24 dat$DGene = gsub("[*].*", "", dat$DGene) |
| 21 | 25 |
| 22 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) | 26 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele) |
| 23 dat$JGene = gsub("[*].*", "", dat$JGene) | 27 dat$JGene = gsub("[*].*", "", dat$JGene) |
| 24 | 28 |
| 29 print(str(dat)) | |
| 30 | |
| 25 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) | 31 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":")) |
| 26 | 32 |
| 27 dat = dat[!duplicated(dat$past), ] | 33 dat = dat[!duplicated(dat$past), ] |
| 28 | 34 |
| 29 write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T) | 35 write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T) |
