Mercurial > repos > davidvanzessen > extract_duplicates
changeset 0:02cf2dd19564 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 21 Aug 2015 10:49:14 -0400 |
parents | |
children | a3c4e3e62e10 |
files | extract_duplicates.r extract_duplicates.sh extract_duplicates.xml |
diffstat | 3 files changed, 39 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_duplicates.r Fri Aug 21 10:49:14 2015 -0400 @@ -0,0 +1,16 @@ +args <- commandArgs(trailingOnly = TRUE) + +input=args[1] +column=as.numeric(args[2]) +header=(args[3] == "yes") +out_file=args[4] + +dat = read.table(input, header=header, sep="\t", fill=T, stringsAsFactors=F) + +duplicates = dat[duplicated(dat[,column]),column] + +dat = dat[dat[,column] %in% duplicates,] + +dat = dat[order(dat[,column]),] + +write.table(dat, out_file, sep="\t", row.names=F, col.names=header, quote=F)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_duplicates.sh Fri Aug 21 10:49:14 2015 -0400 @@ -0,0 +1,7 @@ +input=$1 +column=$2 +header=$3 +out_file=$4 + +dir="$(cd "$(dirname "$0")" && pwd)" +Rscript --verbose $dir/extract_duplicates.r ${input} ${column} ${header} ${out_file} 2>&1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_duplicates.xml Fri Aug 21 10:49:14 2015 -0400 @@ -0,0 +1,16 @@ +<tool id="extract_duplicates" name="Extract Duplicates" version="1.1.0"> + <description>to a new dataset</description> + <command interpreter="bash"> + extract_duplicates.sh $input $column $header $out_file + </command> + <inputs> + <param format="tabular" name="input" type="data" label="Input"/> + <param name="column" label="on column" type="data_column" data_ref="input" accept_default="true" /> + <param name="header" type="boolean" checked="False" truevalue="yes" falsevalue="no" label="Input file has a header?" help="if checked, the first line of the input will be treated as a header"/> + </inputs> + <outputs> + <data format="input" name="out_file" metadata_source="input"/> + </outputs> + <help> +</help> +</tool>