comparison extract_duplicates.r @ 2:1f1640608245 draft default tip

Uploaded
author davidvanzessen
date Tue, 01 Sep 2015 08:34:27 -0400
parents a3c4e3e62e10
children
comparison
equal deleted inserted replaced
1:a3c4e3e62e10 2:1f1640608245
1 args <- commandArgs(trailingOnly = TRUE) 1 args <- commandArgs(trailingOnly = TRUE)
2 2
3 input=args[1] 3 input=args[1]
4 column=as.numeric(args[2]) 4 column=as.numeric(args[2])
5 header=(args[3] == "yes") 5 header=(args[3] == "yes")
6 out_file=args[4] 6 regex_filter=args[4]
7 out_file=args[5]
8
9 print(regex_filter)
7 10
8 dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F, quote="") 11 dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F, quote="")
9 12
10 duplicates = dat[duplicated(dat[,column]),column] 13 dat.names = names(dat)
11 14
12 dat = dat[dat[,column] %in% duplicates,] 15 dat$filtered = gsub("\\(.*", "", dat[,column])
13 16
14 dat = dat[order(dat[,column]),] 17 duplicates = dat[duplicated(dat$filtered),"filtered"]
15 18
16 write.table(dat, out_file, sep="\t", row.names=F, col.names=header, quote=F) 19 dat = dat[dat[,"filtered"] %in% duplicates,]
20
21 dat = dat[order(dat[,"filtered"]),]
22
23 write.table(dat[,dat.names], out_file, sep="\t", row.names=F, col.names=header, quote=F)