Mercurial > repos > davidvanzessen > mutation_analysis
comparison sequence_overview.r @ 90:f0e8dac22c6e draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 01 Jun 2016 05:03:24 -0400 |
parents | 480fdd383fdb |
children | 5e237c243088 |
comparison
equal
deleted
inserted
replaced
89:480fdd383fdb | 90:f0e8dac22c6e |
---|---|
1 library(reshape2) | 1 library(reshape2) |
2 | 2 |
3 args <- commandArgs(trailingOnly = TRUE) | 3 args <- commandArgs(trailingOnly = TRUE) |
4 | 4 |
5 gene.matches = args[1] | 5 input.file = args[1] |
6 sequence.file = args[2] | 6 outputdir = args[2] |
7 merged.file = args[3] | 7 gene.classes = unlist(strsplit(args[3], ",")) |
8 outputdir = args[4] | 8 hotspot.analysis.sum.file = args[4] |
9 gene.classes = unlist(strsplit(args[5], ",")) | |
10 hotspot.analysis.sum.file = args[6] | |
11 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") | 9 NToverview.file = paste(outputdir, "ntoverview.txt", sep="/") |
12 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") | 10 NTsum.file = paste(outputdir, "ntsum.txt", sep="/") |
13 main.html = "index.html" | 11 main.html = "index.html" |
14 | 12 |
15 setwd(outputdir) | 13 setwd(outputdir) |
16 | 14 |
17 genes = read.table(gene.matches, header=T, sep="\t", fill=T) | 15 merged = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") |
18 sequences = read.table(sequence.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | |
19 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") | |
20 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") | 16 hotspot.analysis.sum = read.table(hotspot.analysis.sum.file, header=F, sep=",", fill=T, stringsAsFactors=F, quote="") |
21 | 17 |
22 dat = merge(sequences, genes, by="Sequence.ID") | 18 merged$seq_conc = paste(merged$CDR1.IMGT.seq, merged$FR2.IMGT.seq, merged$CDR2.IMGT.seq, merged$FR3.IMGT.seq, merged$CDR3.IMGT.seq) |
23 | 19 |
24 dat = dat[dat$Sequence.ID %in% merged$Sequence.ID,] | 20 IDs = merged[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] |
25 | |
26 dat$seq_conc = paste(dat$CDR1.IMGT, dat$FR2.IMGT, dat$CDR2.IMGT, dat$FR3.IMGT, dat$CDR3.IMGT) | |
27 #dat$seq_conc = paste(dat$CDR1.IMGT, dat$CDR2.IMGT, dat$CDR3.IMGT) | |
28 | |
29 IDs = dat[,c("Sequence.ID", "seq_conc", "best_match", "Functionality")] | |
30 IDs$best_match = as.character(IDs$best_match) | 21 IDs$best_match = as.character(IDs$best_match) |
31 | 22 |
32 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) | 23 #dat = data.frame(data.table(dat)[, list(freq=.N), by=c("best_match", "seq_conc")]) |
33 | 24 |
34 dat = data.frame(table(dat$seq_conc, dat$Functionality)) | 25 dat = data.frame(table(merged$seq_conc, merged$Functionality)) |
35 | 26 |
36 #dat = dat[dat$Freq > 1,] | 27 #dat = dat[dat$Freq > 1,] |
37 | 28 |
38 names(dat) = c("seq_conc", "Functionality", "Freq") | 29 names(dat) = c("seq_conc", "Functionality", "Freq") |
39 | 30 |
122 | 113 |
123 #ACGT overview | 114 #ACGT overview |
124 | 115 |
125 | 116 |
126 | 117 |
127 NToverview = genes[,c("Sequence.ID", "best_match")] | 118 NToverview = merged |
128 sequences$seq = paste(sequences$CDR1.IMGT, sequences$FR2.IMGT, sequences$CDR2.IMGT, sequences$FR3.IMGT, sep="_") | 119 NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq, sep="_") |
129 | |
130 NToverview = merge(NToverview, sequences[,c("Sequence.ID", "seq")], by="Sequence.ID") | |
131 | |
132 NToverview = NToverview[NToverview$Sequence.ID %in% merged$Sequence.ID,] | |
133 | 120 |
134 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) | 121 NToverview$A = nchar(gsub("[^Aa]", "", NToverview$seq)) |
135 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) | 122 NToverview$C = nchar(gsub("[^Cc]", "", NToverview$seq)) |
136 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) | 123 NToverview$G = nchar(gsub("[^Gg]", "", NToverview$seq)) |
137 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) | 124 NToverview$T = nchar(gsub("[^Tt]", "", NToverview$seq)) |
125 | |
126 print(sum(colSums(NToverview[,c("A", "C", "T", "G")]))) | |
138 | 127 |
139 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) | 128 #Nsum = data.frame(Sequence.ID="-", best_match="Sum", seq="-", A = sum(NToverview$A), C = sum(NToverview$C), G = sum(NToverview$G), T = sum(NToverview$T)) |
140 | 129 |
141 #NToverview = rbind(NToverview, NTsum) | 130 #NToverview = rbind(NToverview, NTsum) |
142 | 131 |