Mercurial > repos > bgruening > text_processing
changeset 30:5907d248dee3 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 28d2fcf2649b999762fbd94bd648485b916f2f0d
| author | bgruening |
|---|---|
| date | Sat, 17 Jan 2026 00:56:56 +0000 |
| parents | 4f7cade041cb |
| children | |
| files | grep.xml macros.xml replace_text_in_line.xml sed.xml sort.xml sorted_uniq.xml test-data/1_dup.bed test-data/sort3.tabular test-data/sort4.tabular test-data/sorted3.tabular test-data/sorted4.tabular test-data/sorted4_partial.tabular test-data/unique_results2.bed test-data/unique_results3.bed unsorted_uniq.xml |
| diffstat | 15 files changed, 404 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/grep.xml Wed Jun 04 15:11:51 2025 +0000 +++ b/grep.xml Sat Jan 17 00:56:56 2026 +0000 @@ -6,7 +6,7 @@ <expand macro="creator"/> <requirements> <requirement type="package" version="3.11">grep</requirement> - <requirement type="package" version="4.8">sed</requirement><!-- for ansi2html.sh --> + <requirement type="package" version="4.9">sed</requirement><!-- for ansi2html.sh --> </requirements> <stdio> <exit_code range="2:" level="fatal" description="grep failed" />
--- a/macros.xml Wed Jun 04 15:11:51 2025 +0000 +++ b/macros.xml Sat Jan 17 00:56:56 2026 +0000 @@ -6,7 +6,7 @@ </requirements> </xml> <token name="@TOOL_VERSION@">9.5</token> - <token name="@VERSION_SUFFIX@">2</token> + <token name="@VERSION_SUFFIX@">3</token> <token name="@PROFILE@">23.1</token> <xml name="stdio"> <stdio>
--- a/replace_text_in_line.xml Wed Jun 04 15:11:51 2025 +0000 +++ b/replace_text_in_line.xml Sat Jan 17 00:56:56 2026 +0000 @@ -5,7 +5,7 @@ </macros> <expand macro="creator"/> <requirements> - <requirement type="package" version="4.8">sed</requirement> + <requirement type="package" version="4.9">sed</requirement> </requirements> <version_command>sed --version | head -n 1</version_command> <command> @@ -26,7 +26,7 @@ <inputs> <param format="txt" name="infile" type="data" label="File to process" /> <repeat name="replacements" title="Replacement" min="1"> - <param name="find_pattern" type="text" size="20" label="Find pattern" help="Use simple text, or a valid regular expression (without backslashes // ) " > + <param name="find_pattern" type="text" label="Find pattern" help="Use simple text, or a valid regular expression"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -38,7 +38,7 @@ </mapping> </sanitizer> </param> - <param name="replace_pattern" type="text" size="20" label="Replace with:" help="Use simple text, or & (ampersand) and \\1 \\2 \\3 to refer to matched text. See examples below." > + <param name="replace_pattern" type="text" label="Replace with:" help="Use simple text, or & (ampersand) and \1, \2, \3, etc. to refer to matched text. See examples below." > <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -50,7 +50,7 @@ </mapping> </sanitizer> </param> - <param name="sed_options" type="text" size="20" optional="true" label="Additional sed commands before replacement" help="Provide additional sed commands before the replacement (e.g., ':a;N;$!ba;')." > + <param name="sed_options" type="text" optional="true" label="Additional sed commands before replacement" help="Provide additional sed commands before the replacement (e.g., ':a;N;$!ba;')." > <sanitizer> <valid initial="string.printable"> <remove value="'"/> <!-- Removes single quotes -->
--- a/sed.xml Wed Jun 04 15:11:51 2025 +0000 +++ b/sed.xml Sat Jan 17 00:56:56 2026 +0000 @@ -5,7 +5,7 @@ </macros> <expand macro="creator"/> <requirements> - <requirement type="package" version="4.8">sed</requirement> + <requirement type="package" version="4.9">sed</requirement> </requirements> <version_command>sed --version | head -n 1</version_command> <command> @@ -88,7 +88,7 @@ - Short sed tutorial (http://www.linuxhowtos.org/System/sed_tutorial.htm) - Long sed tutorial (http://www.grymoire.com/Unix/Sed.html) -- sed faq with good examples (http://sed.sourceforge.net/sedfaq.html) +- sed faq with good examples (https://www.pement.org/sed/sedfaq.html) - sed cheat-sheet (http://www.catonmat.net/download/sed.stream.editor.cheat.sheet.pdf) -----
--- a/sort.xml Wed Jun 04 15:11:51 2025 +0000 +++ b/sort.xml Sat Jan 17 00:56:56 2026 +0000 @@ -5,7 +5,7 @@ </macros> <expand macro="creator"/> <expand macro="requirements"> - <requirement type="package" version="4.8">sed</requirement> + <requirement type="package" version="4.9">sed</requirement> </expand> <version_command>sort --version | head -n 1</version_command> <command> @@ -16,10 +16,18 @@ sed -u '${header}'q && #end if - sort $unique $ignore_case --stable -t ' ' + sort $unique --stable -t ' ' #for $key in $sortkeys: - -k '${key.column}${key.order}${key.style},${key.column}' + #if $key.start_charpos and $key.end_charpos: + -k ${key.column}.${key.start_charpos}${key.ignore_leading_blanks},${key.column}.${key.end_charpos}${key.ignore_leading_blanks}${key.order}${key.style}${key.ignore_case} + #elif $key.start_charpos: + -k ${key.column}.${key.start_charpos}${key.ignore_leading_blanks},${key.column}${key.order}${key.style}${key.ignore_case} + #elif $key.end_charpos: + -k ${key.column}${key.ignore_leading_blanks},${key.column}.${key.end_charpos}${key.ignore_leading_blanks}${key.order}${key.style}${key.ignore_case} + #else: + -k ${key.column}${key.ignore_leading_blanks},${key.column}${key.order}${key.style}${key.ignore_case} + #end if #end for ) < '${infile}' > '${outfile}' @@ -28,17 +36,19 @@ <inputs> <param format="tabular" name="infile" type="data" label="Sort Query" /> <param name="header" type="integer" value="0" - label="Number of header lines" help="These will be ignored during sort."> + label="Number of header lines" help="Header lines will be copied to the output unchanged without operating on them."> <validator type="in_range" message="Negative values are not allowed." min="0"/> </param> - + <repeat name="sortkeys" title="Column selections" min="1"> - <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> + <param name="column" label="Sort on column" type="data_column" data_ref="infile" accept_default="true" /> + <param name="start_charpos" label="considering its characters from" type="integer" min="1" optional="true" help="Leave empty (or set to 1) to use the column value starting from its first character." /> + <param name="end_charpos" label="to and including" type="integer" min="1" optional="true" help="Leave empty to use the column value up to and including its last character." /> <param name="order" type="select" display="radio" label="in"> <option value="">Ascending order</option> <option value="r">Descending order</option> </param> - <param name="style" type="select" display="radio" label="Flavor"> + <param name="style" type="select" display="radio" label="using sort flavor"> <option value="n">Fast numeric sort (-n)</option> <option value="g">General numeric sort ( scientific notation -g)</option> <option value="V">Natural/Version sort (-V) </option> @@ -46,18 +56,20 @@ <option value="h">Human-readable numbers (-h)</option> <option value="R">Random order (-R)</option> </param> + <param name="ignore_case" type="boolean" checked="false" truevalue="f" falsevalue="" + label="ignoring case" help="Turn lowercase symbols to upper case before comparing values in this column. (-f)" /> + <param name="ignore_leading_blanks" type="boolean" checked="false" truevalue="b" falsevalue="" + label="ignoring leading blanks" help="This option can be useful with Alphabetical and Natural sort (which treat spaces as actual characters) or to prevent unwanted offsets if you specified a range of character positions to consider." /> </repeat> <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue="" label="Output unique values" help="Print only unique values, based on sorted key columns. See help section for details. (--unique)" /> - <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" - label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters. (-i)" /> </inputs> <outputs> <data name="outfile" format_source="infile" metadata_source="infile"/> </outputs> <tests> - <test> + <test expect_num_outputs="1"> <param name="infile" value="sort1.bed"/> <param name="header" value="3"/> <repeat name="sortkeys"> @@ -72,7 +84,7 @@ </repeat> <output name="outfile" file="sort_result1.bed"/> </test> - <test> + <test expect_num_outputs="1"> <param name="infile" value="sort1.bed"/> <param name="header" value="3"/> <repeat name="sortkeys"> @@ -87,7 +99,7 @@ </repeat> <output name="outfile" file="sort_result2.bed"/> </test> - <test> + <test expect_num_outputs="1"> <param name="infile" value="sort2.bed"/> <repeat name="sortkeys"> <param name="column" value="5"/> @@ -96,6 +108,80 @@ </repeat> <output name="outfile" file="sort_result3.bed"/> </test> + <test expect_num_outputs="1"> + <param name="infile" value="sort3.tabular"/> + <param name="header" value="0"/> + <param name="unique" value="false"/> + <repeat name="sortkeys"> + <param name="column" value="2"/> + <param name="start_charpos" value="7"/> + <param name="order" value=""/> + <param name="style" value="n"/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="2"/> + <param name="start_charpos" value="4"/> + <param name="end_charpos" value="5"/> + <param name="order" value=""/> + <param name="style" value="n"/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="2"/> + <param name="start_charpos" value="1"/> + <param name="end_charpos" value="2"/> + <param name="order" value="r"/> + <param name="style" value="n"/> + </repeat> + <output name="outfile" file="sorted3.tabular" ftype="tabular" /> + </test> + <!-- Test ignore_case param --> + <test expect_num_outputs="1"> + <param name="infile" value="sort4.tabular"/> + <param name="header" value="1"/> + <param name="unique" value="false"/> + <repeat name="sortkeys"> + <param name="column" value="1"/> + <param name="order" value=""/> + <param name="style" value=""/> + <param name="ignore_case" value="true"/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="3"/> + <param name="order" value="r"/> + <param name="style" value="n"/> + </repeat> + <output name="outfile" file="sorted4_partial.tabular" ftype="tabular" /> + </test> + <!-- Test ignore_leading_blanks param --> + <test expect_num_outputs="1"> + <param name="infile" value="sort4.tabular"/> + <param name="header" value="1"/> + <param name="unique" value="false"/> + <repeat name="sortkeys"> + <param name="column" value="1"/> + <param name="start_charpos" value="1"/> + <param name="end_charpos" value="4"/> + <param name="order" value=""/> + <param name="style" value=""/> + <param name="ignore_case" value="true"/> + <param name="ignore_leading_blanks" value="true"/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="1"/> + <param name="start_charpos" value="5"/> + <param name="order" value=""/> + <param name="style" value=""/> + <param name="ignore_case" value="true"/> + <param name="ignore_leading_blanks" value="true"/> + </repeat> + <repeat name="sortkeys"> + <param name="column" value="3"/> + <param name="order" value="r"/> + <param name="style" value="n"/> + <param name="ignore_leading_blanks" value="true"/> + </repeat> + <output name="outfile" file="sorted4.tabular" ftype="tabular" /> + </test> </tests> <help> <![CDATA[ @@ -171,6 +257,37 @@ If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**, not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs. +----- + +**Example - Sorting based on parts of column values** + +The above column of chromosomes, with their constant prefix, could have been sorted in natural order also with the **Fast numeric sort** and **considering its characters from** character 4 only. + +In general, sorting based on just a range of characters in a column can be useful for sorting values with internal structure, in a single tool run. + +Consider, for example, the following column of dates, which is unfortunately not ISO-8601 formatted:: + + 10/24/2025 + 09/18/1974 + 12/16/1998 + 03/04/2007 + +You could modify these values with other tools first, but you can achieve correct chronological sort order with a single run of the sort tool like this: + +- Do a **Fast numeric sort** on the column **considering its characters from** character 7 (the start of the year). +- Resolve ties (using another column selection section) with another **Fast numeric sort** on the same column **considering its characters from** character 1 **to and including** character 2 (the month representation). +- Resolve remaining ties with a third **Fast numeric sort** on again the same column **considering its characters from** character 4 **to and including** character 5 (the day representation). + +This will result in the ascending chronological order:: + + 09/18/1974 + 11/17/1998 + 11/18/1998 + 12/16/1998 + 03/04/2007 + 10/24/2025 + +Before relying on in-column character ranges, make extra sure that all values are formatted consistently (in the above example, that all dates use two digits for days and months and the same overall date format). ]]> </help> <expand macro="citations" />
--- a/sorted_uniq.xml Wed Jun 04 15:11:51 2025 +0000 +++ b/sorted_uniq.xml Sat Jan 17 00:56:56 2026 +0000 @@ -5,7 +5,7 @@ </macros> <expand macro="creator"/> <expand macro="requirements"> - <requirement type="package" version="4.8">sed</requirement> + <requirement type="package" version="4.9">sed</requirement> </expand> <version_command>uniq --version | head -n 1</version_command> <command>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1_dup.bed Sat Jan 17 00:56:56 2026 +0000 @@ -0,0 +1,71 @@ +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort3.tabular Sat Jan 17 00:56:56 2026 +0000 @@ -0,0 +1,4 @@ +Alice 22.10.2025 +Bob 28.01.2024 +Charlie 13.06.2025 +Alex 25.06.2025
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sort4.tabular Sat Jan 17 00:56:56 2026 +0000 @@ -0,0 +1,5 @@ +Motif Length Obs Code +AGCTAAGG 8 10 A + GCTTAAGGC 9 6 A + CCCGTAG 7 13 A +agcTAAgg 8 40 a
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted3.tabular Sat Jan 17 00:56:56 2026 +0000 @@ -0,0 +1,4 @@ +Bob 28.01.2024 +Alex 25.06.2025 +Charlie 13.06.2025 +Alice 22.10.2025
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted4.tabular Sat Jan 17 00:56:56 2026 +0000 @@ -0,0 +1,5 @@ +Motif Length Obs Code +agcTAAgg 8 40 a +AGCTAAGG 8 10 A + CCCGTAG 7 13 A + GCTTAAGGC 9 6 A
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sorted4_partial.tabular Sat Jan 17 00:56:56 2026 +0000 @@ -0,0 +1,5 @@ +Motif Length Obs Code + GCTTAAGGC 9 6 A + CCCGTAG 7 13 A +agcTAAgg 8 40 a +AGCTAAGG 8 10 A
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unique_results2.bed Sat Jan 17 00:56:56 2026 +0000 @@ -0,0 +1,65 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/unique_results3.bed Sat Jan 17 00:56:56 2026 +0000 @@ -0,0 +1,65 @@ +chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - +chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + +chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - +chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + +chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - +chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - +chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + +chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - +chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + +chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - +chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + +chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - +chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - +chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + +chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - +chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + +chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + +chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - +chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + +chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - +chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - +chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + +chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - +chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + +chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + +chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - +chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + +chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - +chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + +chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - +chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + +chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - +chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - +chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + +chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - +chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + +chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + +chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - +chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + +chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - +chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + +chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - +chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + +chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - +chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + +chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - +chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + +chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - +chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - +chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + +chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - +chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + +chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + +chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - +chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + +chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - +chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - +chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + +chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - +chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + +chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - +chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + +chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - +chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + +chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 -
--- a/unsorted_uniq.xml Wed Jun 04 15:11:51 2025 +0000 +++ b/unsorted_uniq.xml Sat Jan 17 00:56:56 2026 +0000 @@ -4,30 +4,39 @@ <import>macros.xml</import> </macros> <expand macro="creator"/> - <expand macro="requirements" /> + <expand macro="requirements"> + <requirement type="package" version="4.9">sed</requirement> + </expand> <version_command>sort --version | head -n 1</version_command> <command> <![CDATA[ - sort -u - $ignore_case - $is_numeric - -t ' ' - #if $adv_opts.adv_opts_selector == "advanced": - -k$adv_opts.column_start,$adv_opts.column_end + ( + export LC_ALL=C; + #if int($header) > 0: + sed -u '${header}'q && #end if - -o '$outfile' - '$infile' + sort -u + $ignore_case + $is_numeric + -t ' ' + #if $adv_opts.adv_opts_selector == "advanced": + -k$adv_opts.column_start,$adv_opts.column_end + #end if + ) < '$infile' > '$outfile' ]]> </command> <inputs> <param name="infile" type="data" format="tabular" label="File to scan for unique values" /> - <param name="ignore_case" type="boolean" truevalue="-f" falsevalue="" checked="False" + <param name="ignore_case" type="boolean" truevalue="-f" falsevalue="" label="Ignore differences in case when comparing" help="(-f)"/> - <param name="is_numeric" type="boolean" truevalue="-n" falsevalue="" checked="False" - label="Column only contains numeric values" help="(-n)" /> + <param name="is_numeric" type="boolean" truevalue="-n" falsevalue="" + label="Compare numeric values at start of records" help="This will try to detect numeric values at the start of each record and base comparisons only on these numbers (or the empty string if no starting number is found) (-n)." /> + <param name="header" type="integer" value="0" label="Number of header lines" help="These will be ignored during sort."> + <validator type="in_range" message="Negative values are not allowed." min="0"/> + </param> <conditional name="adv_opts"> <param name="adv_opts_selector" type="select" label="Advanced Options"> - <option value="basic" selected="True">Hide Advanced Options</option> + <option value="basic" selected="true">Hide Advanced Options</option> <option value="advanced">Show Advanced Options</option> </param> <when value="basic" /> @@ -43,8 +52,8 @@ <tests> <test> <param name="infile" value="1.bed"/> - <param name="is_numeric" value="True"/> - <param name="ignore_case" value="True"/> + <param name="is_numeric" value="true"/> + <param name="ignore_case" value="true"/> <conditional name="adv_opts"> <param name="adv_opts_selector" value="advanced"/> <param name="column_start" value="2"/> @@ -52,6 +61,24 @@ </conditional> <output name="outfile" file="unique_results1.bed"/> </test> + <test> + <param name="infile" value="1.bed"/> + <param name="is_numeric" value="true"/> + <param name="ignore_case" value="true"/> + <param name="header" value="1"/> + <conditional name="adv_opts"> + <param name="adv_opts_selector" value="advanced"/> + <param name="column_start" value="2"/> + <param name="column_end" value="3"/> + </conditional> + <output name="outfile" file="unique_results2.bed"/> + </test> + <test> + <param name="infile" value="1_dup.bed"/> + <param name="is_numeric" value="false"/> + <param name="ignore_case" value="true"/> + <output name="outfile" file="unique_results3.bed"/> + </test> </tests> <help> <