Mercurial > repos > bgruening > text_processing

<tool id="unixtools_uniq_tool" name="Unique lines">
    <description>assuming sorted input file</description>
    <requirements>
        <requirement type="package" version="8.21">gnu_coreutils</requirement>
        <requirement type="package" version="4.2.2-sandbox">gnu_sed</requirement>
    </requirements>
    <command>
        uniq
            -f
            $skipfields
            $count
            $repeated
            $ignorecase
            $uniqueonly
            $input

            ## feature is not yet released, it will be in the next 8.22 version
            ##--group=$group

            #if $count:
                # count will print the count with spaces infrontof the line and
                # with a space (not a tab) after the number, we need to cahnge that
                | sed -e 's/ *//' -e 's/ /\t/' &gt; $output
            #end if
            &gt; $output
    </command>

    <inputs>
        <param format="txt,tabular" name="input" type="data" label="File to scan for unique values" help="Make sure you have sorted this file" />

        <param name="count" type="boolean" label="Counting number of occurrences [-c]" help="Prefix lines by the number of occurrences" truevalue="-c" falsevalue="" />
        <param name="repeated" type="boolean" label="Only print duplicate lines [-d]" truevalue="-d" falsevalue="" />
        <param name="ignorecase" type="boolean" label="Ignore differences in case when comparing [-i]" truevalue="-i" falsevalue="" />
        <param name="uniqueonly" type="boolean" label="Only print unique lines [-u]" checked="True" truevalue="-u" falsevalue="" />
        <param name="skipfields" type="integer" label="Avoid comparing the first N fields [-f]" help="Use zero to start from the first field" size="2" value="0" />

        <!--
        <param name="group" type="select" label="Output all lines, and delimit each unique group.">
            <option value="separate">Separate unique groups with a single delimiter</option>
            <option value="prepend">Output a delimiter before each group of unique items</option>
            <option value="append">Output a delimiter after each group of unique items.</option>
            <option value="both">Output a delimiter around each group of unique items.</option>
        </param>
        -->
    </inputs>

    <outputs>
        <data format="input" name="output" metadata_source="input"/>
    </outputs>
    <help>
This tool takes a sorted file and look for lines that are unique.

.. class:: warningmark

Please make sure your file is sorted, or else this tool will give you an erroneous output.

.. class:: infomark

You can sort your file using either the "Sort" tool in "Filter and Sort", or the "Sort" tool in "Unix Tools".

    </help>
</tool>
author	bgruening
date	Thu, 05 Sep 2013 12:42:48 -0400
parents	a4ad586d1403
children	7068d1548234