annotate new_imgt.r @ 127:afb0937ec0dc draft default tip

Uploaded
author davidvanzessen
date Tue, 13 Sep 2016 08:55:05 -0400
parents e7b550d52eb7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
114
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
2
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
3 imgt.dir = args[1]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
4 merged.file = args[2]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
5 gene = args[3]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
6
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
7 merged = read.table(merged.file, header=T, sep="\t", fill=T, stringsAsFactors=F)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
8
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
9 if(gene != "-"){
127
afb0937ec0dc Uploaded
davidvanzessen
parents: 114
diff changeset
10 merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),]
afb0937ec0dc Uploaded
davidvanzessen
parents: 114
diff changeset
11 } else {
afb0937ec0dc Uploaded
davidvanzessen
parents: 114
diff changeset
12 merged = merged[!grepl("unmatched", merged$best_match),]
114
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
13 }
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
14
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
15 merged = merged[!grepl("unmatched", merged$best_match),]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
16
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
17 for(f in list.files(imgt.dir, pattern="*.txt$")){
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
18 #print(paste("filtering", f))
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
19 path = paste(imgt.dir, f, sep="")
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
20 dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE)
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
21
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
22 dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
23
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
24 if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
25 dat[,grepl("^FR1", names(dat))] = 0
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
26 }
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
27
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
28 write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="")
e7b550d52eb7 Uploaded
davidvanzessen
parents:
diff changeset
29 }