Mercurial > repos > bgruening > text_processing

<tool id="tp_sorted_uniq" name="Unique" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
  <description>occurrences of each record</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="creator"/>
    <expand macro="requirements">
        <requirement type="package" version="4.9">sed</requirement>
    </expand>
    <version_command>sort --version | head -n 1</version_command>
    <command>
<![CDATA[
    (
        export LC_ALL=C;
        #if int($header) > 0:
            sed -u '${header}'q &&
        #end if
        sort -u
            $ignore_case
            $is_numeric
            -t '	'
            #if $adv_opts.adv_opts_selector == "advanced":
                -k$adv_opts.column_start,$adv_opts.column_end
            #end if
    ) < '$infile' > '$outfile'
]]>
    </command>
    <inputs>
        <param name="infile" type="data" format="tabular" label="File to scan for unique values" />
        <param name="ignore_case" type="boolean" truevalue="-f" falsevalue=""
            label="Ignore differences in case when comparing" help="(-f)"/>
        <param name="is_numeric" type="boolean" truevalue="-n" falsevalue=""
            label="Compare numeric values at start of records" help="This will try to detect numeric values at the start of each record and base comparisons only on these numbers (or the empty string if no starting number is found) (-n)." />
        <param name="header" type="integer" value="0" label="Number of header lines" help="These will be ignored during sort.">
            <validator type="in_range" message="Negative values are not allowed." min="0"/>
        </param>
        <conditional name="adv_opts">
            <param name="adv_opts_selector" type="select" label="Advanced Options">
                <option value="basic" selected="true">Hide Advanced Options</option>
                <option value="advanced">Show Advanced Options</option>
            </param>
            <when value="basic" />
            <when value="advanced">
                <param name="column_start" type="data_column" data_ref="infile" label="Column start" help="Unique on specific column range"/>
                <param name="column_end" type="data_column" data_ref="infile" label="Column end" help="Unique on specific column range"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="outfile" format_source="infile" metadata_source="infile"/>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="1.bed"/>
            <param name="is_numeric" value="true"/>
            <param name="ignore_case" value="true"/>
            <conditional name="adv_opts">
                <param name="adv_opts_selector" value="advanced"/>
                <param name="column_start" value="2"/>
                <param name="column_end" value="3"/>
            </conditional>
            <output name="outfile" file="unique_results1.bed"/>
        </test>
        <test>
            <param name="infile" value="1.bed"/>
            <param name="is_numeric" value="true"/>
            <param name="ignore_case" value="true"/>
            <param name="header" value="1"/>
            <conditional name="adv_opts">
                <param name="adv_opts_selector" value="advanced"/>
                <param name="column_start" value="2"/>
                <param name="column_end" value="3"/>
            </conditional>
            <output name="outfile" file="unique_results2.bed"/>
        </test>
        <test>
            <param name="infile" value="1_dup.bed"/>
            <param name="is_numeric" value="false"/>
            <param name="ignore_case" value="true"/>
            <output name="outfile" file="unique_results3.bed"/>
        </test>
    </tests>
    <help>
<![CDATA[
 .. class:: infomark

**Syntax**

This tool returns all unique lines using the 'sort -u' command. It can be used with unsorted files.
If you need additional options, like grouping or counting your unique results, please use the 'Unique lines from sorted file' tool.

-----

.. class:: infomark

The input file needs to be tab separated. Please convert your file if necessary.

-----

**Example**

- Input file::

       chr1   10  100  gene1
       chr1  105  200  gene2
       chr1   10  100  gene1
       chr2   10  100  gene4
       chr2 1000 1900  gene5
       chr3   15 1656  gene6
       chr2   10  100  gene4

- Unique lines will result in::

       chr1   10  100  gene1
       chr1  105  200  gene2
       chr2   10  100  gene4
       chr2 1000 1900  gene5
       chr3   15 1656  gene6

]]>
</help>
<expand macro="citations" />
</tool>
author	bgruening
date	Sat, 17 Jan 2026 00:56:56 +0000
parents	08cdbfffce67
children