arg = commandArgs(TRUE)                       
summaryfile = arg[1]
gappedfile = arg[2]
selection = arg[3]
output = arg[4]
print(paste("-----", selection, "------"))

summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)[,c("Sequence.ID", "AA.JUNCTION")]
gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
head(summarydat)
head(gappeddat)

dat = merge(gappeddat, summarydat, by="Sequence.ID")
head(dat)

dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
dat$VGene = gsub("[*].*", "", dat$VGene)

dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
dat$DGene = gsub("[*].*", "", dat$DGene)

dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
dat$JGene = gsub("[*].*", "", dat$JGene)

dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))

dat = dat[!duplicated(dat$past), ]

write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)
