view extract_duplicates.r @ 2:1f1640608245 draft default tip

Uploaded
author davidvanzessen
date Tue, 01 Sep 2015 08:34:27 -0400
parents a3c4e3e62e10
children
line wrap: on
line source

args <- commandArgs(trailingOnly = TRUE)

input=args[1]
column=as.numeric(args[2])
header=(args[3] == "yes")
regex_filter=args[4]
out_file=args[5]

print(regex_filter)

dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F, quote="")

dat.names = names(dat)

dat$filtered = gsub("\\(.*", "", dat[,column])

duplicates = dat[duplicated(dat$filtered),"filtered"]

dat = dat[dat[,"filtered"] %in% duplicates,]

dat = dat[order(dat[,"filtered"]),]

write.table(dat[,dat.names], out_file, sep="\t", row.names=F, col.names=header, quote=F)