annotate extract_duplicates.r @ 2:1f1640608245 draft default tip

Uploaded
author davidvanzessen
date Tue, 01 Sep 2015 08:34:27 -0400
parents a3c4e3e62e10
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
2
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
3 input=args[1]
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
4 column=as.numeric(args[2])
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
5 header=(args[3] == "yes")
2
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
6 regex_filter=args[4]
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
7 out_file=args[5]
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
8
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
9 print(regex_filter)
0
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
10
1
a3c4e3e62e10 Uploaded
davidvanzessen
parents: 0
diff changeset
11 dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F, quote="")
0
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
12
2
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
13 dat.names = names(dat)
0
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
14
2
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
15 dat$filtered = gsub("\\(.*", "", dat[,column])
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
16
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
17 duplicates = dat[duplicated(dat$filtered),"filtered"]
0
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
18
2
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
19 dat = dat[dat[,"filtered"] %in% duplicates,]
0
02cf2dd19564 Uploaded
davidvanzessen
parents:
diff changeset
20
2
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
21 dat = dat[order(dat[,"filtered"]),]
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
22
1f1640608245 Uploaded
davidvanzessen
parents: 1
diff changeset
23 write.table(dat[,dat.names], out_file, sep="\t", row.names=F, col.names=header, quote=F)