Mercurial > repos > davidvanzessen > mutation_analysis
comparison sequence_overview.r @ 100:ff5be711382b draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 17 Jun 2016 05:36:32 -0400 |
parents | 86206431cbb0 |
children | e6bc976760d4 |
comparison
equal
deleted
inserted
replaced
99:86206431cbb0 | 100:ff5be711382b |
---|---|
1 library(reshape2) | 1 library(reshape2) |
2 | 2 |
3 args <- commandArgs(trailingOnly = TRUE) | 3 args <- commandArgs(trailingOnly = TRUE) |
4 | 4 |
5 input.file = args[1] | 5 before.unique.file = args[1] |
6 outputdir = args[2] | 6 merged.file = args[2] |
7 gene.classes = unlist(strsplit(args[3], ",")) | 7 outputdir = args[3] |
8 hotspot.analysis.sum.file = args[4] | 8 gene.classes = unlist(strsplit(args[4], ",")) |
9 hotspot.analysis.sum.file = args[5] | |
9 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") | 10 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") |
10 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") | 11 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") |
11 main.html = "index.html" | 12 main.html = "index.html" |
12 | 13 |
13 setwd(outputdir) | 14 setwd(outputdir) |
14 | 15 |
15 merged = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | 16 before.unique = read.table(before.unique.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") |
17 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | |
16 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") | 18 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") |
17 | 19 |
18 merged$seq_conc = paste(merged$CDR1.IMGT.seq, merged$FR2.IMGT.seq, merged$CDR2.IMGT.seq, merged$FR3.IMGT.seq, merged$CDR3.IMGT.seq) | 20 before.unique = before.unique[!grepl("unmatched", before.unique$best_match),] |
19 | 21 |
20 IDs = merged[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] | 22 before.unique$seq_conc = paste(before.unique$CDR1.IMGT.seq, before.unique$FR2.IMGT.seq, before.unique$CDR2.IMGT.seq, before.unique$FR3.IMGT.seq, before.unique$CDR3.IMGT.seq) |
23 | |
24 IDs = before.unique[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] | |
21 IDs$best_match = as.character(IDs$best_match) | 25 IDs$best_match = as.character(IDs$best_match) |
22 | 26 |
23 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) | 27 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) |
24 | 28 |
25 dat = data.frame(table(merged$seq_conc)) | 29 dat = data.frame(table(before.unique$seq_conc)) |
26 #dat = data.frame(table(merged$seq_conc, merged$Functionality)) | 30 #dat = data.frame(table(merged$seq_conc, merged$Functionality)) |
27 | 31 |
28 #dat = dat[dat$Freq > 1,] | 32 #dat = dat[dat$Freq > 1,] |
29 | 33 |
30 #names(dat) = c("seq_conc", "Functionality", "Freq") | 34 #names(dat) = c("seq_conc", "Functionality", "Freq") |
136 print(paste("Count that should match 'matched' sequences:", matched)) | 140 print(paste("Count that should match 'matched' sequences:", matched)) |
137 | 141 |
138 #ACGT overview | 142 #ACGT overview |
139 | 143 |
140 NToverview = merged | 144 NToverview = merged |
145 | |
141 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") | 146 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") |
142 | 147 |
143 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) | 148 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) |
144 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) | 149 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) |
145 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) | 150 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) |