annotate baseline/filter.r @ 127:afb0937ec0dc draft default tip

Uploaded
author davidvanzessen
date Tue, 13 Sep 2016 08:55:05 -0400
parents e7b550d52eb7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
114
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
1 arg = commandArgs(TRUE)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
2 summaryfile = arg[1]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
3 gappedfile = arg[2]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
4 selection = arg[3]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
5 output = arg[4]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
6 print(paste("selection = ", selection))
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
7
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
8
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
9 summarydat = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
10 gappeddat = read.table(gappedfile, header=T, sep="\t", fill=T, stringsAsFactors=F)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
11
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
12 #dat = data.frame(merge(gappeddat, summarydat, by="Sequence.ID", all.x=T))
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
13
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
14 dat = cbind(gappeddat, summarydat$AA.JUNCTION)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
15
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
16 colnames(dat)[length(dat)] = "AA.JUNCTION"
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
17
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
18 dat$VGene = gsub("^Homsap ", "", dat$V.GENE.and.allele)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
19 dat$VGene = gsub("[*].*", "", dat$VGene)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
20
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
21 dat$DGene = gsub("^Homsap ", "", dat$D.GENE.and.allele)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
22 dat$DGene = gsub("[*].*", "", dat$DGene)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
23
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
24 dat$JGene = gsub("^Homsap ", "", dat$J.GENE.and.allele)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
25 dat$JGene = gsub("[*].*", "", dat$JGene)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
26
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
27 #print(str(dat))
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
28
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
29 dat$past = do.call(paste, c(dat[unlist(strsplit(selection, ","))], sep = ":"))
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
30
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
31 dat = dat[!duplicated(dat$past), ]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
32
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
33 dat = dat[dat$Functionality != "No results" & dat$Functionality != "unproductive",]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
34
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
35 write.table(x=dat, file=output, sep="\t",quote=F,row.names=F,col.names=T)