diff extract_duplicates.r @ 0:02cf2dd19564 draft

Uploaded
author davidvanzessen
date Fri, 21 Aug 2015 10:49:14 -0400
parents
children a3c4e3e62e10
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/extract_duplicates.r	Fri Aug 21 10:49:14 2015 -0400
@@ -0,0 +1,16 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+input=args[1]
+column=as.numeric(args[2])
+header=(args[3] == "yes")
+out_file=args[4]
+
+dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F)
+
+duplicates = dat[duplicated(dat[,column]),column]
+
+dat = dat[dat[,column] %in% duplicates,]
+
+dat = dat[order(dat[,column]),]
+
+write.table(dat, out_file, sep="\t", row.names=F, col.names=header, quote=F)