changeset 30:5907d248dee3 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 28d2fcf2649b999762fbd94bd648485b916f2f0d
author bgruening
date Sat, 17 Jan 2026 00:56:56 +0000
parents 4f7cade041cb
children
files grep.xml macros.xml replace_text_in_line.xml sed.xml sort.xml sorted_uniq.xml test-data/1_dup.bed test-data/sort3.tabular test-data/sort4.tabular test-data/sorted3.tabular test-data/sorted4.tabular test-data/sorted4_partial.tabular test-data/unique_results2.bed test-data/unique_results3.bed unsorted_uniq.xml
diffstat 15 files changed, 404 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/grep.xml	Wed Jun 04 15:11:51 2025 +0000
+++ b/grep.xml	Sat Jan 17 00:56:56 2026 +0000
@@ -6,7 +6,7 @@
     <expand macro="creator"/>
     <requirements>
         <requirement type="package" version="3.11">grep</requirement>
-        <requirement type="package" version="4.8">sed</requirement><!-- for ansi2html.sh -->
+        <requirement type="package" version="4.9">sed</requirement><!-- for ansi2html.sh -->
     </requirements>
     <stdio>
         <exit_code range="2:" level="fatal" description="grep failed" />
--- a/macros.xml	Wed Jun 04 15:11:51 2025 +0000
+++ b/macros.xml	Sat Jan 17 00:56:56 2026 +0000
@@ -6,7 +6,7 @@
         </requirements>
     </xml>
     <token name="@TOOL_VERSION@">9.5</token>
-    <token name="@VERSION_SUFFIX@">2</token>
+    <token name="@VERSION_SUFFIX@">3</token>
     <token name="@PROFILE@">23.1</token>
     <xml name="stdio">
         <stdio>
--- a/replace_text_in_line.xml	Wed Jun 04 15:11:51 2025 +0000
+++ b/replace_text_in_line.xml	Sat Jan 17 00:56:56 2026 +0000
@@ -5,7 +5,7 @@
     </macros>
     <expand macro="creator"/>
     <requirements>
-        <requirement type="package" version="4.8">sed</requirement>
+        <requirement type="package" version="4.9">sed</requirement>
     </requirements>
     <version_command>sed --version | head -n 1</version_command>
     <command>
@@ -26,7 +26,7 @@
     <inputs>
       <param format="txt" name="infile" type="data" label="File to process" />
       <repeat name="replacements" title="Replacement" min="1">
-         <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " >
+         <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression">
             <sanitizer>
                 <valid initial="string.printable">
                     <remove value="&#39;"/>
@@ -38,7 +38,7 @@
                 </mapping>
             </sanitizer>
          </param>
-         <param name="replace_pattern" type="text" size="20" label="Replace with:" help="Use simple text, or &amp; (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." >
+         <param name="replace_pattern" type="text" label="Replace with:" help="Use simple text, or &amp; (ampersand) and \1, \2, \3, etc. to refer to matched text. See examples below." >
             <sanitizer>
                 <valid initial="string.printable">
                     <remove value="&#39;"/>
@@ -50,7 +50,7 @@
                 </mapping>
             </sanitizer>
          </param>
-         <param name="sed_options" type="text" size="20" optional="true" label="Additional sed commands before replacement" help="Provide additional sed commands before the replacement (e.g., ':a;N;$!ba;')." >
+         <param name="sed_options" type="text" optional="true" label="Additional sed commands before replacement" help="Provide additional sed commands before the replacement (e.g., ':a;N;$!ba;')." >
             <sanitizer>
               <valid initial="string.printable">
                   <remove value="&#39;"/>  <!-- Removes single quotes -->
--- a/sed.xml	Wed Jun 04 15:11:51 2025 +0000
+++ b/sed.xml	Sat Jan 17 00:56:56 2026 +0000
@@ -5,7 +5,7 @@
     </macros>
     <expand macro="creator"/>
     <requirements>
-        <requirement type="package" version="4.8">sed</requirement>
+        <requirement type="package" version="4.9">sed</requirement>
     </requirements>
     <version_command>sed --version | head -n 1</version_command>
     <command>
@@ -88,7 +88,7 @@
 
 - Short sed tutorial (http://www.linuxhowtos.org/System/sed_tutorial.htm)
 - Long sed tutorial (http://www.grymoire.com/Unix/Sed.html)
-- sed faq with good examples (http://sed.sourceforge.net/sedfaq.html)
+- sed faq with good examples (https://www.pement.org/sed/sedfaq.html)
 - sed cheat-sheet (http://www.catonmat.net/download/sed.stream.editor.cheat.sheet.pdf)
 
 -----
--- a/sort.xml	Wed Jun 04 15:11:51 2025 +0000
+++ b/sort.xml	Sat Jan 17 00:56:56 2026 +0000
@@ -5,7 +5,7 @@
     </macros>
     <expand macro="creator"/>
     <expand macro="requirements">
-        <requirement type="package" version="4.8">sed</requirement>
+        <requirement type="package" version="4.9">sed</requirement>
     </expand>
     <version_command>sort --version | head -n 1</version_command>
     <command>
@@ -16,10 +16,18 @@
                 sed -u '${header}'q &&
             #end if
 
-            sort $unique $ignore_case --stable -t '	'
+            sort $unique --stable -t '	'
 
             #for $key in $sortkeys:
-                -k '${key.column}${key.order}${key.style},${key.column}'
+                #if $key.start_charpos and $key.end_charpos:
+                    -k ${key.column}.${key.start_charpos}${key.ignore_leading_blanks},${key.column}.${key.end_charpos}${key.ignore_leading_blanks}${key.order}${key.style}${key.ignore_case}
+                #elif $key.start_charpos:
+                    -k ${key.column}.${key.start_charpos}${key.ignore_leading_blanks},${key.column}${key.order}${key.style}${key.ignore_case}
+                #elif $key.end_charpos:
+                    -k ${key.column}${key.ignore_leading_blanks},${key.column}.${key.end_charpos}${key.ignore_leading_blanks}${key.order}${key.style}${key.ignore_case}
+                #else:
+                    -k ${key.column}${key.ignore_leading_blanks},${key.column}${key.order}${key.style}${key.ignore_case}
+                #end if
             #end for
 
         ) < '${infile}' > '${outfile}'
@@ -28,17 +36,19 @@
     <inputs>
         <param format="tabular" name="infile" type="data" label="Sort Query" />
         <param name="header" type="integer" value="0"
-            label="Number of header lines" help="These will be ignored during sort.">
+            label="Number of header lines" help="Header lines will be copied to the output unchanged without operating on them.">
             <validator type="in_range" message="Negative values are not allowed." min="0"/>
         </param>
-
+        
         <repeat name="sortkeys" title="Column selections" min="1">
-            <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
+            <param name="column" label="Sort on column" type="data_column" data_ref="infile" accept_default="true" />
+            <param name="start_charpos" label="considering its characters from" type="integer" min="1" optional="true" help="Leave empty (or set to 1) to use the column value starting from its first character." />
+            <param name="end_charpos" label="to and including" type="integer" min="1" optional="true" help="Leave empty to use the column value up to and including its last character." />
             <param name="order" type="select" display="radio" label="in">
                 <option value="">Ascending order</option>
                 <option value="r">Descending order</option>
             </param>
-            <param name="style" type="select" display="radio" label="Flavor">
+            <param name="style" type="select" display="radio" label="using sort flavor">
                 <option value="n">Fast numeric sort (-n)</option>
                 <option value="g">General numeric sort ( scientific notation -g)</option>
                 <option value="V">Natural/Version sort (-V) </option>
@@ -46,18 +56,20 @@
                 <option value="h">Human-readable numbers (-h)</option>
                 <option value="R">Random order (-R)</option>
             </param>
+            <param name="ignore_case" type="boolean" checked="false" truevalue="f" falsevalue=""
+                   label="ignoring case" help="Turn lowercase symbols to upper case before comparing values in this column. (-f)" />
+            <param name="ignore_leading_blanks" type="boolean" checked="false" truevalue="b" falsevalue=""
+                   label="ignoring leading blanks" help="This option can be useful with Alphabetical and Natural sort (which treat spaces as actual characters) or to prevent unwanted offsets if you specified a range of character positions to consider." />
         </repeat>
 
         <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue=""
             label="Output unique values" help="Print only unique values, based on sorted key columns. See help section for details. (--unique)" />
-        <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue=""
-            label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters. (-i)" />
     </inputs>
     <outputs>
         <data name="outfile" format_source="infile" metadata_source="infile"/>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" value="sort1.bed"/>
             <param name="header" value="3"/>
             <repeat name="sortkeys">
@@ -72,7 +84,7 @@
             </repeat>
             <output name="outfile" file="sort_result1.bed"/>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" value="sort1.bed"/>
             <param name="header" value="3"/>
             <repeat name="sortkeys">
@@ -87,7 +99,7 @@
             </repeat>
             <output name="outfile" file="sort_result2.bed"/>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="infile" value="sort2.bed"/>
             <repeat name="sortkeys">
                 <param name="column" value="5"/>
@@ -96,6 +108,80 @@
             </repeat>
             <output name="outfile" file="sort_result3.bed"/>
         </test>
+        <test expect_num_outputs="1">
+            <param name="infile" value="sort3.tabular"/>
+            <param name="header" value="0"/>
+            <param name="unique" value="false"/>
+            <repeat name="sortkeys">
+                <param name="column" value="2"/>
+                <param name="start_charpos" value="7"/>
+                <param name="order" value=""/>
+                <param name="style" value="n"/>
+            </repeat>
+            <repeat name="sortkeys">
+                <param name="column" value="2"/>
+                <param name="start_charpos" value="4"/>
+                <param name="end_charpos" value="5"/>
+                <param name="order" value=""/>
+                <param name="style" value="n"/>
+            </repeat>
+            <repeat name="sortkeys">
+                <param name="column" value="2"/>
+                <param name="start_charpos" value="1"/>
+                <param name="end_charpos" value="2"/>
+                <param name="order" value="r"/>
+                <param name="style" value="n"/>
+            </repeat>
+            <output name="outfile" file="sorted3.tabular" ftype="tabular" />
+        </test>
+        <!-- Test ignore_case param -->
+        <test expect_num_outputs="1">
+            <param name="infile" value="sort4.tabular"/>
+            <param name="header" value="1"/>
+            <param name="unique" value="false"/>
+            <repeat name="sortkeys">
+                <param name="column" value="1"/>
+                <param name="order" value=""/>
+                <param name="style" value=""/>
+                <param name="ignore_case" value="true"/>
+            </repeat>
+            <repeat name="sortkeys">
+                <param name="column" value="3"/>
+                <param name="order" value="r"/>
+                <param name="style" value="n"/>
+            </repeat>
+            <output name="outfile" file="sorted4_partial.tabular" ftype="tabular" />
+        </test>
+        <!-- Test ignore_leading_blanks param -->
+        <test expect_num_outputs="1">
+            <param name="infile" value="sort4.tabular"/>
+            <param name="header" value="1"/>
+            <param name="unique" value="false"/>
+            <repeat name="sortkeys">
+                <param name="column" value="1"/>
+                <param name="start_charpos" value="1"/>
+                <param name="end_charpos" value="4"/>
+                <param name="order" value=""/>
+                <param name="style" value=""/>
+                <param name="ignore_case" value="true"/>
+                <param name="ignore_leading_blanks" value="true"/>
+            </repeat>
+            <repeat name="sortkeys">
+                <param name="column" value="1"/>
+                <param name="start_charpos" value="5"/>
+                <param name="order" value=""/>
+                <param name="style" value=""/>
+                <param name="ignore_case" value="true"/>
+                <param name="ignore_leading_blanks" value="true"/>
+            </repeat>
+            <repeat name="sortkeys">
+                <param name="column" value="3"/>
+                <param name="order" value="r"/>
+                <param name="style" value="n"/>
+                <param name="ignore_leading_blanks" value="true"/>
+            </repeat>
+            <output name="outfile" file="sorted4.tabular" ftype="tabular" />
+        </test>
     </tests>
     <help>
 <![CDATA[
@@ -171,6 +257,37 @@
 
 If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**,  not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs.
 
+-----
+
+**Example - Sorting based on parts of column values**
+
+The above column of chromosomes, with their constant prefix, could have been sorted in natural order also with the **Fast numeric sort** and **considering its characters from** character 4 only.
+
+In general, sorting based on just a range of characters in a column can be useful for sorting values with internal structure, in a single tool run.
+
+Consider, for example, the following column of dates, which is unfortunately not ISO-8601 formatted::
+
+    10/24/2025
+    09/18/1974
+    12/16/1998
+    03/04/2007
+
+You could modify these values with other tools first, but you can achieve correct chronological sort order with a single run of the sort tool like this:
+
+- Do a **Fast numeric sort** on the column **considering its characters from** character 7 (the start of the year).
+- Resolve ties (using another column selection section) with another **Fast numeric sort** on the same column **considering its characters from** character 1 **to and including** character 2 (the month representation).
+- Resolve remaining ties with a third **Fast numeric sort** on again the same column **considering its characters from** character 4 **to and including** character 5 (the day representation).
+
+This will result in the ascending chronological order::
+
+    09/18/1974
+    11/17/1998
+    11/18/1998
+    12/16/1998
+    03/04/2007
+    10/24/2025
+
+Before relying on in-column character ranges, make extra sure that all values are formatted consistently (in the above example, that all dates use two digits for days and months and the same overall date format).
 ]]>
   </help>
   <expand macro="citations" />
--- a/sorted_uniq.xml	Wed Jun 04 15:11:51 2025 +0000
+++ b/sorted_uniq.xml	Sat Jan 17 00:56:56 2026 +0000
@@ -5,7 +5,7 @@
     </macros>
     <expand macro="creator"/>
     <expand macro="requirements">
-        <requirement type="package" version="4.8">sed</requirement>
+        <requirement type="package" version="4.9">sed</requirement>
     </expand>
     <version_command>uniq --version | head -n 1</version_command>
     <command>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1_dup.bed	Sat Jan 17 00:56:56 2026 +0000
@@ -0,0 +1,71 @@
+chr6	108594662	108594687	CCDS5063.1_cds_0_0_chr6_108594663_f	0	+
+chr6	108640045	108640151	CCDS5064.1_cds_0_0_chr6_108640046_r	0	-
+chr6	108722976	108723115	CCDS5067.1_cds_0_0_chr6_108722977_f	0	+
+chr7	113660517	113660685	CCDS5760.1_cds_0_0_chr7_113660518_f	0	+
+chr1	147962192	147962580	CCDS989.1_cds_0_0_chr1_147962193_r	0	-
+chr1	147984545	147984630	CCDS990.1_cds_0_0_chr1_147984546_f	0	+
+chr1	148078400	148078582	CCDS993.1_cds_0_0_chr1_148078401_r	0	-
+chr1	148078400	148078582	CCDS993.1_cds_0_0_chr1_148078401_r	0	-
+chr1	148078400	148078582	CCDS993.1_cds_0_0_chr1_148078401_r	0	-
+chr1	148185136	148185276	CCDS996.1_cds_0_0_chr1_148185137_f	0	+
+chr10	55251623	55253124	CCDS7248.1_cds_0_0_chr10_55251624_r	0	-
+chr10	55251623	55253124	CCDS7248.1_cds_0_0_chr10_55251624_r	0	-
+chr11	116124407	116124501	CCDS8374.1_cds_0_0_chr11_116124408_r	0	-
+chr11	116206508	116206563	CCDS8377.1_cds_0_0_chr11_116206509_f	0	+
+chr11	116211733	116212337	CCDS8378.1_cds_0_0_chr11_116211734_r	0	-
+chr11	1812377	1812407	CCDS7726.1_cds_0_0_chr11_1812378_f	0	+
+chr12	38440094	38440321	CCDS8736.1_cds_0_0_chr12_38440095_r	0	-
+chr12	38440094	38440321	CCDS8736.1_cds_0_0_chr12_38440095_r	0	-
+chr13	112381694	112381953	CCDS9526.1_cds_0_0_chr13_112381695_f	0	+
+chr14	98710240	98712285	CCDS9949.1_cds_0_0_chr14_98710241_r	0	-
+chr15	41486872	41487060	CCDS10096.1_cds_0_0_chr15_41486873_r	0	-
+chr12	38440094	38440321	CCDS8736.1_cds_0_0_chr12_38440095_r	0	-
+chr15	41673708	41673857	CCDS10097.1_cds_0_0_chr15_41673709_f	0	+
+chr15	41679161	41679250	CCDS10098.1_cds_0_0_chr15_41679162_r	0	-
+chr15	41826029	41826196	CCDS10101.1_cds_0_0_chr15_41826030_f	0	+
+chr16	142908	143003	CCDS10397.1_cds_0_0_chr16_142909_f	0	+
+chr16	179963	180135	CCDS10401.1_cds_0_0_chr16_179964_r	0	-
+chr16	244413	244681	CCDS10402.1_cds_0_0_chr16_244414_f	0	+
+chr16	259268	259383	CCDS10403.1_cds_0_0_chr16_259269_r	0	-
+chr18	23786114	23786321	CCDS11891.1_cds_0_0_chr18_23786115_r	0	-
+chr18	59406881	59407046	CCDS11985.1_cds_0_0_chr18_59406882_f	0	+
+chr12	38440094	38440321	CCDS8736.1_cds_0_0_chr12_38440095_r	0	-
+chr18	59455932	59456337	CCDS11986.1_cds_0_0_chr18_59455933_r	0	-
+chr18	59600586	59600754	CCDS11988.1_cds_0_0_chr18_59600587_f	0	+
+chr19	59068595	59069564	CCDS12866.1_cds_0_0_chr19_59068596_f	0	+
+chr19	59236026	59236146	CCDS12872.1_cds_0_0_chr19_59236027_r	0	-
+chr19	59297998	59298008	CCDS12877.1_cds_0_0_chr19_59297999_f	0	+
+chr19	59302168	59302288	CCDS12878.1_cds_0_0_chr19_59302169_r	0	-
+chr2	118288583	118288668	CCDS2120.1_cds_0_0_chr2_118288584_f	0	+
+chr2	118394148	118394202	CCDS2121.1_cds_0_0_chr2_118394149_r	0	-
+chr2	220190202	220190242	CCDS2441.1_cds_0_0_chr2_220190203_f	0	+
+chr2	220229609	220230869	CCDS2443.1_cds_0_0_chr2_220229610_r	0	-
+chr20	33330413	33330423	CCDS13249.1_cds_0_0_chr20_33330414_r	0	-
+chr20	33513606	33513792	CCDS13255.1_cds_0_0_chr20_33513607_f	0	+
+chr20	33579500	33579527	CCDS13256.1_cds_0_0_chr20_33579501_r	0	-
+chr20	33593260	33593348	CCDS13257.1_cds_0_0_chr20_33593261_f	0	+
+chr21	32707032	32707192	CCDS13614.1_cds_0_0_chr21_32707033_f	0	+
+chr21	32869641	32870022	CCDS13615.1_cds_0_0_chr21_32869642_r	0	-
+chr21	33321040	33322012	CCDS13620.1_cds_0_0_chr21_33321041_f	0	+
+chr21	33744994	33745040	CCDS13625.1_cds_0_0_chr21_33744995_r	0	-
+chr22	30120223	30120265	CCDS13897.1_cds_0_0_chr22_30120224_f	0	+
+chr22	30160419	30160661	CCDS13898.1_cds_0_0_chr22_30160420_r	0	-
+chr22	30665273	30665360	CCDS13901.1_cds_0_0_chr22_30665274_f	0	+
+chr22	30939054	30939266	CCDS13903.1_cds_0_0_chr22_30939055_r	0	-
+chr5	131424298	131424460	CCDS4149.1_cds_0_0_chr5_131424299_f	0	+
+chr5	131556601	131556672	CCDS4151.1_cds_0_0_chr5_131556602_r	0	-
+chr5	131621326	131621419	CCDS4152.1_cds_0_0_chr5_131621327_f	0	+
+chr5	131847541	131847666	CCDS4155.1_cds_0_0_chr5_131847542_r	0	-
+chr6	108299600	108299744	CCDS5061.1_cds_0_0_chr6_108299601_r	0	-
+chr7	116512159	116512389	CCDS5771.1_cds_0_0_chr7_116512160_r	0	-
+chr7	116714099	116714152	CCDS5773.1_cds_0_0_chr7_116714100_f	0	+
+chr7	116945541	116945787	CCDS5774.1_cds_0_0_chr7_116945542_r	0	-
+chr8	118881131	118881317	CCDS6324.1_cds_0_0_chr8_118881132_r	0	-
+chr9	128764156	128764189	CCDS6914.1_cds_0_0_chr9_128764157_f	0	+
+chr9	128787519	128789136	CCDS6915.1_cds_0_0_chr9_128787520_r	0	-
+chr9	128882427	128882523	CCDS6917.1_cds_0_0_chr9_128882428_f	0	+
+chr9	128937229	128937445	CCDS6919.1_cds_0_0_chr9_128937230_r	0	-
+chrX	122745047	122745924	CCDS14606.1_cds_0_0_chrX_122745048_f	0	+
+chrX	152648964	152649196	CCDS14733.1_cds_0_0_chrX_152648965_r	0	-
+chrX	152691446	152691471	CCDS14735.1_cds_0_0_chrX_152691447_f	0	+
+chrX	152694029	152694263	CCDS14736.1_cds_0_0_chrX_152694030_r	0	-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort3.tabular	Sat Jan 17 00:56:56 2026 +0000
@@ -0,0 +1,4 @@
+Alice	22.10.2025
+Bob	28.01.2024
+Charlie	13.06.2025
+Alex	25.06.2025
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sort4.tabular	Sat Jan 17 00:56:56 2026 +0000
@@ -0,0 +1,5 @@
+Motif	Length	Obs	Code
+AGCTAAGG	8	10	A
+  GCTTAAGGC	9	6	A
+ CCCGTAG	7	13	A
+agcTAAgg	8	40	a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sorted3.tabular	Sat Jan 17 00:56:56 2026 +0000
@@ -0,0 +1,4 @@
+Bob	28.01.2024
+Alex	25.06.2025
+Charlie	13.06.2025
+Alice	22.10.2025
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sorted4.tabular	Sat Jan 17 00:56:56 2026 +0000
@@ -0,0 +1,5 @@
+Motif	Length	Obs	Code
+agcTAAgg	8	40	a
+AGCTAAGG	8	10	A
+ CCCGTAG	7	13	A
+  GCTTAAGGC	9	6	A
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sorted4_partial.tabular	Sat Jan 17 00:56:56 2026 +0000
@@ -0,0 +1,5 @@
+Motif	Length	Obs	Code
+  GCTTAAGGC	9	6	A
+ CCCGTAG	7	13	A
+agcTAAgg	8	40	a
+AGCTAAGG	8	10	A
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unique_results2.bed	Sat Jan 17 00:56:56 2026 +0000
@@ -0,0 +1,65 @@
+chr1	147962192	147962580	CCDS989.1_cds_0_0_chr1_147962193_r	0	-
+chr16	142908	143003	CCDS10397.1_cds_0_0_chr16_142909_f	0	+
+chr16	179963	180135	CCDS10401.1_cds_0_0_chr16_179964_r	0	-
+chr16	244413	244681	CCDS10402.1_cds_0_0_chr16_244414_f	0	+
+chr16	259268	259383	CCDS10403.1_cds_0_0_chr16_259269_r	0	-
+chr11	1812377	1812407	CCDS7726.1_cds_0_0_chr11_1812378_f	0	+
+chr18	23786114	23786321	CCDS11891.1_cds_0_0_chr18_23786115_r	0	-
+chr22	30120223	30120265	CCDS13897.1_cds_0_0_chr22_30120224_f	0	+
+chr22	30160419	30160661	CCDS13898.1_cds_0_0_chr22_30160420_r	0	-
+chr22	30665273	30665360	CCDS13901.1_cds_0_0_chr22_30665274_f	0	+
+chr22	30939054	30939266	CCDS13903.1_cds_0_0_chr22_30939055_r	0	-
+chr21	32707032	32707192	CCDS13614.1_cds_0_0_chr21_32707033_f	0	+
+chr21	32869641	32870022	CCDS13615.1_cds_0_0_chr21_32869642_r	0	-
+chr21	33321040	33322012	CCDS13620.1_cds_0_0_chr21_33321041_f	0	+
+chr20	33330413	33330423	CCDS13249.1_cds_0_0_chr20_33330414_r	0	-
+chr20	33513606	33513792	CCDS13255.1_cds_0_0_chr20_33513607_f	0	+
+chr20	33579500	33579527	CCDS13256.1_cds_0_0_chr20_33579501_r	0	-
+chr20	33593260	33593348	CCDS13257.1_cds_0_0_chr20_33593261_f	0	+
+chr21	33744994	33745040	CCDS13625.1_cds_0_0_chr21_33744995_r	0	-
+chr12	38440094	38440321	CCDS8736.1_cds_0_0_chr12_38440095_r	0	-
+chr15	41486872	41487060	CCDS10096.1_cds_0_0_chr15_41486873_r	0	-
+chr15	41673708	41673857	CCDS10097.1_cds_0_0_chr15_41673709_f	0	+
+chr15	41679161	41679250	CCDS10098.1_cds_0_0_chr15_41679162_r	0	-
+chr15	41826029	41826196	CCDS10101.1_cds_0_0_chr15_41826030_f	0	+
+chr10	55251623	55253124	CCDS7248.1_cds_0_0_chr10_55251624_r	0	-
+chr19	59068595	59069564	CCDS12866.1_cds_0_0_chr19_59068596_f	0	+
+chr19	59236026	59236146	CCDS12872.1_cds_0_0_chr19_59236027_r	0	-
+chr19	59297998	59298008	CCDS12877.1_cds_0_0_chr19_59297999_f	0	+
+chr19	59302168	59302288	CCDS12878.1_cds_0_0_chr19_59302169_r	0	-
+chr18	59406881	59407046	CCDS11985.1_cds_0_0_chr18_59406882_f	0	+
+chr18	59455932	59456337	CCDS11986.1_cds_0_0_chr18_59455933_r	0	-
+chr18	59600586	59600754	CCDS11988.1_cds_0_0_chr18_59600587_f	0	+
+chr14	98710240	98712285	CCDS9949.1_cds_0_0_chr14_98710241_r	0	-
+chr6	108299600	108299744	CCDS5061.1_cds_0_0_chr6_108299601_r	0	-
+chr6	108594662	108594687	CCDS5063.1_cds_0_0_chr6_108594663_f	0	+
+chr6	108640045	108640151	CCDS5064.1_cds_0_0_chr6_108640046_r	0	-
+chr6	108722976	108723115	CCDS5067.1_cds_0_0_chr6_108722977_f	0	+
+chr13	112381694	112381953	CCDS9526.1_cds_0_0_chr13_112381695_f	0	+
+chr7	113660517	113660685	CCDS5760.1_cds_0_0_chr7_113660518_f	0	+
+chr11	116124407	116124501	CCDS8374.1_cds_0_0_chr11_116124408_r	0	-
+chr11	116206508	116206563	CCDS8377.1_cds_0_0_chr11_116206509_f	0	+
+chr11	116211733	116212337	CCDS8378.1_cds_0_0_chr11_116211734_r	0	-
+chr7	116512159	116512389	CCDS5771.1_cds_0_0_chr7_116512160_r	0	-
+chr7	116714099	116714152	CCDS5773.1_cds_0_0_chr7_116714100_f	0	+
+chr7	116945541	116945787	CCDS5774.1_cds_0_0_chr7_116945542_r	0	-
+chr2	118288583	118288668	CCDS2120.1_cds_0_0_chr2_118288584_f	0	+
+chr2	118394148	118394202	CCDS2121.1_cds_0_0_chr2_118394149_r	0	-
+chr8	118881131	118881317	CCDS6324.1_cds_0_0_chr8_118881132_r	0	-
+chrX	122745047	122745924	CCDS14606.1_cds_0_0_chrX_122745048_f	0	+
+chr9	128764156	128764189	CCDS6914.1_cds_0_0_chr9_128764157_f	0	+
+chr9	128787519	128789136	CCDS6915.1_cds_0_0_chr9_128787520_r	0	-
+chr9	128882427	128882523	CCDS6917.1_cds_0_0_chr9_128882428_f	0	+
+chr9	128937229	128937445	CCDS6919.1_cds_0_0_chr9_128937230_r	0	-
+chr5	131424298	131424460	CCDS4149.1_cds_0_0_chr5_131424299_f	0	+
+chr5	131556601	131556672	CCDS4151.1_cds_0_0_chr5_131556602_r	0	-
+chr5	131621326	131621419	CCDS4152.1_cds_0_0_chr5_131621327_f	0	+
+chr5	131847541	131847666	CCDS4155.1_cds_0_0_chr5_131847542_r	0	-
+chr1	147984545	147984630	CCDS990.1_cds_0_0_chr1_147984546_f	0	+
+chr1	148078400	148078582	CCDS993.1_cds_0_0_chr1_148078401_r	0	-
+chr1	148185136	148185276	CCDS996.1_cds_0_0_chr1_148185137_f	0	+
+chrX	152648964	152649196	CCDS14733.1_cds_0_0_chrX_152648965_r	0	-
+chrX	152691446	152691471	CCDS14735.1_cds_0_0_chrX_152691447_f	0	+
+chrX	152694029	152694263	CCDS14736.1_cds_0_0_chrX_152694030_r	0	-
+chr2	220190202	220190242	CCDS2441.1_cds_0_0_chr2_220190203_f	0	+
+chr2	220229609	220230869	CCDS2443.1_cds_0_0_chr2_220229610_r	0	-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/unique_results3.bed	Sat Jan 17 00:56:56 2026 +0000
@@ -0,0 +1,65 @@
+chr1	147962192	147962580	CCDS989.1_cds_0_0_chr1_147962193_r	0	-
+chr1	147984545	147984630	CCDS990.1_cds_0_0_chr1_147984546_f	0	+
+chr1	148078400	148078582	CCDS993.1_cds_0_0_chr1_148078401_r	0	-
+chr1	148185136	148185276	CCDS996.1_cds_0_0_chr1_148185137_f	0	+
+chr10	55251623	55253124	CCDS7248.1_cds_0_0_chr10_55251624_r	0	-
+chr11	116124407	116124501	CCDS8374.1_cds_0_0_chr11_116124408_r	0	-
+chr11	116206508	116206563	CCDS8377.1_cds_0_0_chr11_116206509_f	0	+
+chr11	116211733	116212337	CCDS8378.1_cds_0_0_chr11_116211734_r	0	-
+chr11	1812377	1812407	CCDS7726.1_cds_0_0_chr11_1812378_f	0	+
+chr12	38440094	38440321	CCDS8736.1_cds_0_0_chr12_38440095_r	0	-
+chr13	112381694	112381953	CCDS9526.1_cds_0_0_chr13_112381695_f	0	+
+chr14	98710240	98712285	CCDS9949.1_cds_0_0_chr14_98710241_r	0	-
+chr15	41486872	41487060	CCDS10096.1_cds_0_0_chr15_41486873_r	0	-
+chr15	41673708	41673857	CCDS10097.1_cds_0_0_chr15_41673709_f	0	+
+chr15	41679161	41679250	CCDS10098.1_cds_0_0_chr15_41679162_r	0	-
+chr15	41826029	41826196	CCDS10101.1_cds_0_0_chr15_41826030_f	0	+
+chr16	142908	143003	CCDS10397.1_cds_0_0_chr16_142909_f	0	+
+chr16	179963	180135	CCDS10401.1_cds_0_0_chr16_179964_r	0	-
+chr16	244413	244681	CCDS10402.1_cds_0_0_chr16_244414_f	0	+
+chr16	259268	259383	CCDS10403.1_cds_0_0_chr16_259269_r	0	-
+chr18	23786114	23786321	CCDS11891.1_cds_0_0_chr18_23786115_r	0	-
+chr18	59406881	59407046	CCDS11985.1_cds_0_0_chr18_59406882_f	0	+
+chr18	59455932	59456337	CCDS11986.1_cds_0_0_chr18_59455933_r	0	-
+chr18	59600586	59600754	CCDS11988.1_cds_0_0_chr18_59600587_f	0	+
+chr19	59068595	59069564	CCDS12866.1_cds_0_0_chr19_59068596_f	0	+
+chr19	59236026	59236146	CCDS12872.1_cds_0_0_chr19_59236027_r	0	-
+chr19	59297998	59298008	CCDS12877.1_cds_0_0_chr19_59297999_f	0	+
+chr19	59302168	59302288	CCDS12878.1_cds_0_0_chr19_59302169_r	0	-
+chr2	118288583	118288668	CCDS2120.1_cds_0_0_chr2_118288584_f	0	+
+chr2	118394148	118394202	CCDS2121.1_cds_0_0_chr2_118394149_r	0	-
+chr2	220190202	220190242	CCDS2441.1_cds_0_0_chr2_220190203_f	0	+
+chr2	220229609	220230869	CCDS2443.1_cds_0_0_chr2_220229610_r	0	-
+chr20	33330413	33330423	CCDS13249.1_cds_0_0_chr20_33330414_r	0	-
+chr20	33513606	33513792	CCDS13255.1_cds_0_0_chr20_33513607_f	0	+
+chr20	33579500	33579527	CCDS13256.1_cds_0_0_chr20_33579501_r	0	-
+chr20	33593260	33593348	CCDS13257.1_cds_0_0_chr20_33593261_f	0	+
+chr21	32707032	32707192	CCDS13614.1_cds_0_0_chr21_32707033_f	0	+
+chr21	32869641	32870022	CCDS13615.1_cds_0_0_chr21_32869642_r	0	-
+chr21	33321040	33322012	CCDS13620.1_cds_0_0_chr21_33321041_f	0	+
+chr21	33744994	33745040	CCDS13625.1_cds_0_0_chr21_33744995_r	0	-
+chr22	30120223	30120265	CCDS13897.1_cds_0_0_chr22_30120224_f	0	+
+chr22	30160419	30160661	CCDS13898.1_cds_0_0_chr22_30160420_r	0	-
+chr22	30665273	30665360	CCDS13901.1_cds_0_0_chr22_30665274_f	0	+
+chr22	30939054	30939266	CCDS13903.1_cds_0_0_chr22_30939055_r	0	-
+chr5	131424298	131424460	CCDS4149.1_cds_0_0_chr5_131424299_f	0	+
+chr5	131556601	131556672	CCDS4151.1_cds_0_0_chr5_131556602_r	0	-
+chr5	131621326	131621419	CCDS4152.1_cds_0_0_chr5_131621327_f	0	+
+chr5	131847541	131847666	CCDS4155.1_cds_0_0_chr5_131847542_r	0	-
+chr6	108299600	108299744	CCDS5061.1_cds_0_0_chr6_108299601_r	0	-
+chr6	108594662	108594687	CCDS5063.1_cds_0_0_chr6_108594663_f	0	+
+chr6	108640045	108640151	CCDS5064.1_cds_0_0_chr6_108640046_r	0	-
+chr6	108722976	108723115	CCDS5067.1_cds_0_0_chr6_108722977_f	0	+
+chr7	113660517	113660685	CCDS5760.1_cds_0_0_chr7_113660518_f	0	+
+chr7	116512159	116512389	CCDS5771.1_cds_0_0_chr7_116512160_r	0	-
+chr7	116714099	116714152	CCDS5773.1_cds_0_0_chr7_116714100_f	0	+
+chr7	116945541	116945787	CCDS5774.1_cds_0_0_chr7_116945542_r	0	-
+chr8	118881131	118881317	CCDS6324.1_cds_0_0_chr8_118881132_r	0	-
+chr9	128764156	128764189	CCDS6914.1_cds_0_0_chr9_128764157_f	0	+
+chr9	128787519	128789136	CCDS6915.1_cds_0_0_chr9_128787520_r	0	-
+chr9	128882427	128882523	CCDS6917.1_cds_0_0_chr9_128882428_f	0	+
+chr9	128937229	128937445	CCDS6919.1_cds_0_0_chr9_128937230_r	0	-
+chrX	122745047	122745924	CCDS14606.1_cds_0_0_chrX_122745048_f	0	+
+chrX	152648964	152649196	CCDS14733.1_cds_0_0_chrX_152648965_r	0	-
+chrX	152691446	152691471	CCDS14735.1_cds_0_0_chrX_152691447_f	0	+
+chrX	152694029	152694263	CCDS14736.1_cds_0_0_chrX_152694030_r	0	-
--- a/unsorted_uniq.xml	Wed Jun 04 15:11:51 2025 +0000
+++ b/unsorted_uniq.xml	Sat Jan 17 00:56:56 2026 +0000
@@ -4,30 +4,39 @@
         <import>macros.xml</import>
     </macros>
     <expand macro="creator"/>
-    <expand macro="requirements" />
+    <expand macro="requirements">
+        <requirement type="package" version="4.9">sed</requirement>
+    </expand>
     <version_command>sort --version | head -n 1</version_command>
     <command>
 <![CDATA[
-    sort -u
-        $ignore_case
-        $is_numeric
-        -t '	'
-        #if $adv_opts.adv_opts_selector == "advanced":
-            -k$adv_opts.column_start,$adv_opts.column_end
+    (
+        export LC_ALL=C;
+        #if int($header) > 0:
+            sed -u '${header}'q &&
         #end if
-        -o '$outfile'
-        '$infile'
+        sort -u
+            $ignore_case
+            $is_numeric
+            -t '	'
+            #if $adv_opts.adv_opts_selector == "advanced":
+                -k$adv_opts.column_start,$adv_opts.column_end
+            #end if
+    ) < '$infile' > '$outfile'
 ]]>
     </command>
     <inputs>
         <param name="infile" type="data" format="tabular" label="File to scan for unique values" />
-        <param name="ignore_case" type="boolean" truevalue="-f" falsevalue="" checked="False"
+        <param name="ignore_case" type="boolean" truevalue="-f" falsevalue=""
             label="Ignore differences in case when comparing" help="(-f)"/>
-        <param name="is_numeric" type="boolean" truevalue="-n" falsevalue="" checked="False"
-            label="Column only contains numeric values" help="(-n)" />
+        <param name="is_numeric" type="boolean" truevalue="-n" falsevalue=""
+            label="Compare numeric values at start of records" help="This will try to detect numeric values at the start of each record and base comparisons only on these numbers (or the empty string if no starting number is found) (-n)." />
+        <param name="header" type="integer" value="0" label="Number of header lines" help="These will be ignored during sort.">
+            <validator type="in_range" message="Negative values are not allowed." min="0"/>
+        </param>
         <conditional name="adv_opts">
             <param name="adv_opts_selector" type="select" label="Advanced Options">
-                <option value="basic" selected="True">Hide Advanced Options</option>
+                <option value="basic" selected="true">Hide Advanced Options</option>
                 <option value="advanced">Show Advanced Options</option>
             </param>
             <when value="basic" />
@@ -43,8 +52,8 @@
     <tests>
         <test>
             <param name="infile" value="1.bed"/>
-            <param name="is_numeric" value="True"/>
-            <param name="ignore_case" value="True"/>
+            <param name="is_numeric" value="true"/>
+            <param name="ignore_case" value="true"/>
             <conditional name="adv_opts">
                 <param name="adv_opts_selector" value="advanced"/>
                 <param name="column_start" value="2"/>
@@ -52,6 +61,24 @@
             </conditional>
             <output name="outfile" file="unique_results1.bed"/>
         </test>
+        <test>
+            <param name="infile" value="1.bed"/>
+            <param name="is_numeric" value="true"/>
+            <param name="ignore_case" value="true"/>
+            <param name="header" value="1"/>
+            <conditional name="adv_opts">
+                <param name="adv_opts_selector" value="advanced"/>
+                <param name="column_start" value="2"/>
+                <param name="column_end" value="3"/>
+            </conditional>
+            <output name="outfile" file="unique_results2.bed"/>
+        </test>
+        <test>
+            <param name="infile" value="1_dup.bed"/>
+            <param name="is_numeric" value="false"/>
+            <param name="ignore_case" value="true"/>
+            <output name="outfile" file="unique_results3.bed"/>
+        </test>
     </tests>
     <help>
 <![CDATA[