Mercurial > repos > davidvanzessen > extract_duplicates
view extract_duplicates.r @ 2:1f1640608245 draft default tip
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 01 Sep 2015 08:34:27 -0400 |
parents | a3c4e3e62e10 |
children |
line wrap: on
line source
args <- commandArgs(trailingOnly = TRUE) input=args[1] column=as.numeric(args[2]) header=(args[3] == "yes") regex_filter=args[4] out_file=args[5] print(regex_filter) dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F, quote="") dat.names = names(dat) dat$filtered = gsub("\\(.*", "", dat[,column]) duplicates = dat[duplicated(dat$filtered),"filtered"] dat = dat[dat[,"filtered"] %in% duplicates,] dat = dat[order(dat[,"filtered"]),] write.table(dat[,dat.names], out_file, sep="\t", row.names=F, col.names=header, quote=F)