Mercurial > repos > davidvanzessen > mutation_analysis
diff sequence_overview.r @ 100:ff5be711382b draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 17 Jun 2016 05:36:32 -0400 |
parents | 86206431cbb0 |
children | e6bc976760d4 |
line wrap: on
line diff
--- a/sequence_overview.r Thu Jun 16 10:01:54 2016 -0400 +++ b/sequence_overview.r Fri Jun 17 05:36:32 2016 -0400 @@ -2,27 +2,31 @@ args <- commandArgs(trailingOnly = TRUE) -input.file = args[1] -outputdir = args[2] -gene.classes = unlist(strsplit(args[3], ",")) -hotspot.analysis.sum.file = args[4] +before.unique.file = args[1] +merged.file = args[2] +outputdir = args[3] +gene.classes = unlist(strsplit(args[4], ",")) +hotspot.analysis.sum.file = args[5] NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") NTsum.file = paste(outputdir, "ntsum.txt", sep="/") main.html = "index.html" setwd(outputdir) -merged = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") -merged$seq_conc = paste(merged$CDR1.IMGT.seq, merged$FR2.IMGT.seq, merged$CDR2.IMGT.seq, merged$FR3.IMGT.seq, merged$CDR3.IMGT.seq) +before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] -IDs = merged[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] +before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) + +IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] IDs$best_match = as.character(IDs$best_match) #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) -dat = data.frame(table(merged$seq_conc)) +dat = data.frame(table(before.unique$seq_conc)) #dat = data.frame(table(merged$seq_conc, merged$Functionality)) #dat = dat[dat$Freq > 1,] @@ -138,6 +142,7 @@ #ACGT overview NToverview = merged + NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))