comparison sequence_overview.r @ 100:ff5be711382b draft

Uploaded
author davidvanzessen
date Fri, 17 Jun 2016 05:36:32 -0400
parents 86206431cbb0
children e6bc976760d4
comparison
equal deleted inserted replaced
99:86206431cbb0 100:ff5be711382b
1 library(reshape2) 1 library(reshape2)
2 2
3 args <- commandArgs(trailingOnly = TRUE) 3 args <- commandArgs(trailingOnly = TRUE)
4 4
5 input.file = args[1] 5 before.unique.file = args[1]
6 outputdir = args[2] 6 merged.file = args[2]
7 gene.classes = unlist(strsplit(args[3], ",")) 7 outputdir = args[3]
8 hotspot.analysis.sum.file = args[4] 8 gene.classes = unlist(strsplit(args[4], ","))
9 hotspot.analysis.sum.file = args[5]
9 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") 10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/")
10 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") 11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/")
11 main.html = "index.html" 12 main.html = "index.html"
12 13
13 setwd(outputdir) 14 setwd(outputdir)
14 15
15 merged = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") 16 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
17 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
16 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") 18 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="")
17 19
18 merged$seq_conc = paste(merged$CDR1.IMGT.seq, merged$FR2.IMGT.seq, merged$CDR2.IMGT.seq, merged$FR3.IMGT.seq, merged$CDR3.IMGT.seq) 20 before.unique = before.unique[!grepl("unmatched", before.unique$best_match),]
19 21
20 IDs = merged[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] 22 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq)
23
24 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")]
21 IDs$best_match = as.character(IDs$best_match) 25 IDs$best_match = as.character(IDs$best_match)
22 26
23 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) 27 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")])
24 28
25 dat = data.frame(table(merged$seq_conc)) 29 dat = data.frame(table(before.unique$seq_conc))
26 #dat = data.frame(table(merged$seq_conc, merged$Functionality)) 30 #dat = data.frame(table(merged$seq_conc, merged$Functionality))
27 31
28 #dat = dat[dat$Freq > 1,] 32 #dat = dat[dat$Freq > 1,]
29 33
30 #names(dat) = c("seq_conc", "Functionality", "Freq") 34 #names(dat) = c("seq_conc", "Functionality", "Freq")
136 print(paste("Count that should match 'matched' sequences:", matched)) 140 print(paste("Count that should match 'matched' sequences:", matched))
137 141
138 #ACGT overview 142 #ACGT overview
139 143
140 NToverview = merged 144 NToverview = merged
145
141 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") 146 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_")
142 147
143 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) 148 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq))
144 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) 149 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq))
145 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) 150 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq))