Mercurial > repos > davidvanzessen > extract_duplicates
comparison extract_duplicates.r @ 2:1f1640608245 draft default tip
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 01 Sep 2015 08:34:27 -0400 |
| parents | a3c4e3e62e10 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:a3c4e3e62e10 | 2:1f1640608245 |
|---|---|
| 1 args <- commandArgs(trailingOnly = TRUE) | 1 args <- commandArgs(trailingOnly = TRUE) |
| 2 | 2 |
| 3 input=args[1] | 3 input=args[1] |
| 4 column=as.numeric(args[2]) | 4 column=as.numeric(args[2]) |
| 5 header=(args[3] == "yes") | 5 header=(args[3] == "yes") |
| 6 out_file=args[4] | 6 regex_filter=args[4] |
| 7 out_file=args[5] | |
| 8 | |
| 9 print(regex_filter) | |
| 7 | 10 |
| 8 dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F, quote="") | 11 dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F, quote="") |
| 9 | 12 |
| 10 duplicates = dat[duplicated(dat[,column]),column] | 13 dat.names = names(dat) |
| 11 | 14 |
| 12 dat = dat[dat[,column] %in% duplicates,] | 15 dat$filtered = gsub("\\(.*", "", dat[,column]) |
| 13 | 16 |
| 14 dat = dat[order(dat[,column]),] | 17 duplicates = dat[duplicated(dat$filtered),"filtered"] |
| 15 | 18 |
| 16 write.table(dat, out_file, sep="\t", row.names=F, col.names=header, quote=F) | 19 dat = dat[dat[,"filtered"] %in% duplicates,] |
| 20 | |
| 21 dat = dat[order(dat[,"filtered"]),] | |
| 22 | |
| 23 write.table(dat[,dat.names], out_file, sep="\t", row.names=F, col.names=header, quote=F) |
