args <- commandArgs(trailingOnly = TRUE)


summaryfile = args[1]
mutationanalysisfile = args[2]
mutationstatsfile = args[3]
hotspotsfile = args[4]
output = args[5]
unmatchedfile = args[6]

summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F)


summ = summ[summ$Functionality != "No results",]
tmp = summ[summ$chunk_hit_percentage >= 70 & summ$nt_hit_percentage >= 70,]
unmatched = summ[summ$chunk_hit_percentage < 70 & summ$nt_hit_percentage < 70,]
unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
summ = tmp
rm(tmp)

if(length(summ$Sequence.ID) == 0){
	stop("No data remaining after filter")
}

result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-2])], by="Sequence.ID")
result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID")
result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID")


cleanup_columns = c("FR1.IMGT.Nb.of.mutations", 
                    "CDR1.IMGT.Nb.of.mutations", 
                    "FR2.IMGT.Nb.of.mutations", 
                    "CDR2.IMGT.Nb.of.mutations", 
                    "FR3.IMGT.Nb.of.mutations")

for(col in cleanup_columns){
  result[,col] = gsub("\\(.*\\)", "", result[,col])
  result[,col] = as.numeric(result[,col])
  result[is.na(result[,col]),] = 0
}

result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele)
result$VGene = gsub("[*].*", "", result$VGene)
result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele)
result$JGene = gsub("[*].*", "", result$JGene)

result$past = paste(result$AA.JUNCTION, result$VGene, result$JGene, (result$FR1.IMGT.Nb.of.mutations + result$CDR1.IMGT.Nb.of.mutations + result$FR2.IMGT.Nb.of.mutations + result$CDR2.IMGT.Nb.of.mutations + result$FR3.IMGT.Nb.of.mutations), result$best_match)

result = result[!duplicated(result$past), ]

result = result[,!(names(result) %in% c("past"))]

write.table(x=result, file=output, sep="\t",quote=F,row.names=F,col.names=T)
write.table(x=unmatched, file=unmatchedfile, sep="\t",quote=F,row.names=F,col.names=T)
