diff sequence_overview.r @ 100:ff5be711382b draft

Uploaded
author davidvanzessen
date Fri, 17 Jun 2016 05:36:32 -0400
parents 86206431cbb0
children e6bc976760d4
line wrap: on
line diff
--- a/sequence_overview.r	Thu Jun 16 10:01:54 2016 -0400
+++ b/sequence_overview.r	Fri Jun 17 05:36:32 2016 -0400
@@ -2,27 +2,31 @@
 
 args <- commandArgs(trailingOnly = TRUE)
 
-input.file = args[1]
-outputdir = args[2]
-gene.classes = unlist(strsplit(args[3], ","))
-hotspot.analysis.sum.file = args[4]
+before.unique.file = args[1]
+merged.file = args[2]
+outputdir = args[3]
+gene.classes = unlist(strsplit(args[4], ","))
+hotspot.analysis.sum.file = args[5]
 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
 NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
 main.html = "index.html"
 
 setwd(outputdir)
 
-merged = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="")
 
-merged$seq_conc = paste(merged$CDR1.IMGT.seq, merged$FR2.IMGT.seq, merged$CDR2.IMGT.seq, merged$FR3.IMGT.seq, merged$CDR3.IMGT.seq)
+before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
 
-IDs = merged[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
+before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
+
+IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
 IDs$best_match = as.character(IDs$best_match)
 
 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
 
-dat = data.frame(table(merged$seq_conc))
+dat = data.frame(table(before.unique$seq_conc))
 #dat = data.frame(table(merged$seq_conc, merged$Functionality))
 
 #dat = dat[dat$Freq > 1,]
@@ -138,6 +142,7 @@
 #ACGT overview
 
 NToverview = merged
+
 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_")
 
 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))