comparison sort.xml @ 30:5907d248dee3 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/text_processing commit 28d2fcf2649b999762fbd94bd648485b916f2f0d
author bgruening
date Sat, 17 Jan 2026 00:56:56 +0000
parents 4f7cade041cb
children
comparison
equal deleted inserted replaced
29:4f7cade041cb 30:5907d248dee3
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="creator"/> 6 <expand macro="creator"/>
7 <expand macro="requirements"> 7 <expand macro="requirements">
8 <requirement type="package" version="4.8">sed</requirement> 8 <requirement type="package" version="4.9">sed</requirement>
9 </expand> 9 </expand>
10 <version_command>sort --version | head -n 1</version_command> 10 <version_command>sort --version | head -n 1</version_command>
11 <command> 11 <command>
12 <![CDATA[ 12 <![CDATA[
13 ( 13 (
14 export LC_ALL=C; 14 export LC_ALL=C;
15 #if int($header) > 0: 15 #if int($header) > 0:
16 sed -u '${header}'q && 16 sed -u '${header}'q &&
17 #end if 17 #end if
18 18
19 sort $unique $ignore_case --stable -t ' ' 19 sort $unique --stable -t ' '
20 20
21 #for $key in $sortkeys: 21 #for $key in $sortkeys:
22 -k '${key.column}${key.order}${key.style},${key.column}' 22 #if $key.start_charpos and $key.end_charpos:
23 -k ${key.column}.${key.start_charpos}${key.ignore_leading_blanks},${key.column}.${key.end_charpos}${key.ignore_leading_blanks}${key.order}${key.style}${key.ignore_case}
24 #elif $key.start_charpos:
25 -k ${key.column}.${key.start_charpos}${key.ignore_leading_blanks},${key.column}${key.order}${key.style}${key.ignore_case}
26 #elif $key.end_charpos:
27 -k ${key.column}${key.ignore_leading_blanks},${key.column}.${key.end_charpos}${key.ignore_leading_blanks}${key.order}${key.style}${key.ignore_case}
28 #else:
29 -k ${key.column}${key.ignore_leading_blanks},${key.column}${key.order}${key.style}${key.ignore_case}
30 #end if
23 #end for 31 #end for
24 32
25 ) < '${infile}' > '${outfile}' 33 ) < '${infile}' > '${outfile}'
26 ]]> 34 ]]>
27 </command> 35 </command>
28 <inputs> 36 <inputs>
29 <param format="tabular" name="infile" type="data" label="Sort Query" /> 37 <param format="tabular" name="infile" type="data" label="Sort Query" />
30 <param name="header" type="integer" value="0" 38 <param name="header" type="integer" value="0"
31 label="Number of header lines" help="These will be ignored during sort."> 39 label="Number of header lines" help="Header lines will be copied to the output unchanged without operating on them.">
32 <validator type="in_range" message="Negative values are not allowed." min="0"/> 40 <validator type="in_range" message="Negative values are not allowed." min="0"/>
33 </param> 41 </param>
34 42
35 <repeat name="sortkeys" title="Column selections" min="1"> 43 <repeat name="sortkeys" title="Column selections" min="1">
36 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> 44 <param name="column" label="Sort on column" type="data_column" data_ref="infile" accept_default="true" />
45 <param name="start_charpos" label="considering its characters from" type="integer" min="1" optional="true" help="Leave empty (or set to 1) to use the column value starting from its first character." />
46 <param name="end_charpos" label="to and including" type="integer" min="1" optional="true" help="Leave empty to use the column value up to and including its last character." />
37 <param name="order" type="select" display="radio" label="in"> 47 <param name="order" type="select" display="radio" label="in">
38 <option value="">Ascending order</option> 48 <option value="">Ascending order</option>
39 <option value="r">Descending order</option> 49 <option value="r">Descending order</option>
40 </param> 50 </param>
41 <param name="style" type="select" display="radio" label="Flavor"> 51 <param name="style" type="select" display="radio" label="using sort flavor">
42 <option value="n">Fast numeric sort (-n)</option> 52 <option value="n">Fast numeric sort (-n)</option>
43 <option value="g">General numeric sort ( scientific notation -g)</option> 53 <option value="g">General numeric sort ( scientific notation -g)</option>
44 <option value="V">Natural/Version sort (-V) </option> 54 <option value="V">Natural/Version sort (-V) </option>
45 <option value="">Alphabetical sort</option> 55 <option value="">Alphabetical sort</option>
46 <option value="h">Human-readable numbers (-h)</option> 56 <option value="h">Human-readable numbers (-h)</option>
47 <option value="R">Random order (-R)</option> 57 <option value="R">Random order (-R)</option>
48 </param> 58 </param>
59 <param name="ignore_case" type="boolean" checked="false" truevalue="f" falsevalue=""
60 label="ignoring case" help="Turn lowercase symbols to upper case before comparing values in this column. (-f)" />
61 <param name="ignore_leading_blanks" type="boolean" checked="false" truevalue="b" falsevalue=""
62 label="ignoring leading blanks" help="This option can be useful with Alphabetical and Natural sort (which treat spaces as actual characters) or to prevent unwanted offsets if you specified a range of character positions to consider." />
49 </repeat> 63 </repeat>
50 64
51 <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue="" 65 <param name="unique" type="boolean" checked="false" truevalue="--unique" falsevalue=""
52 label="Output unique values" help="Print only unique values, based on sorted key columns. See help section for details. (--unique)" /> 66 label="Output unique values" help="Print only unique values, based on sorted key columns. See help section for details. (--unique)" />
53 <param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue=""
54 label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters. (-i)" />
55 </inputs> 67 </inputs>
56 <outputs> 68 <outputs>
57 <data name="outfile" format_source="infile" metadata_source="infile"/> 69 <data name="outfile" format_source="infile" metadata_source="infile"/>
58 </outputs> 70 </outputs>
59 <tests> 71 <tests>
60 <test> 72 <test expect_num_outputs="1">
61 <param name="infile" value="sort1.bed"/> 73 <param name="infile" value="sort1.bed"/>
62 <param name="header" value="3"/> 74 <param name="header" value="3"/>
63 <repeat name="sortkeys"> 75 <repeat name="sortkeys">
64 <param name="column" value="1"/> 76 <param name="column" value="1"/>
65 <param name="style" value=""/> 77 <param name="style" value=""/>
70 <param name="style" value="n"/> 82 <param name="style" value="n"/>
71 <param name="order" value="r"/> 83 <param name="order" value="r"/>
72 </repeat> 84 </repeat>
73 <output name="outfile" file="sort_result1.bed"/> 85 <output name="outfile" file="sort_result1.bed"/>
74 </test> 86 </test>
75 <test> 87 <test expect_num_outputs="1">
76 <param name="infile" value="sort1.bed"/> 88 <param name="infile" value="sort1.bed"/>
77 <param name="header" value="3"/> 89 <param name="header" value="3"/>
78 <repeat name="sortkeys"> 90 <repeat name="sortkeys">
79 <param name="column" value="1"/> 91 <param name="column" value="1"/>
80 <param name="style" value=""/> 92 <param name="style" value=""/>
85 <param name="style" value="n"/> 97 <param name="style" value="n"/>
86 <param name="order" value=""/> 98 <param name="order" value=""/>
87 </repeat> 99 </repeat>
88 <output name="outfile" file="sort_result2.bed"/> 100 <output name="outfile" file="sort_result2.bed"/>
89 </test> 101 </test>
90 <test> 102 <test expect_num_outputs="1">
91 <param name="infile" value="sort2.bed"/> 103 <param name="infile" value="sort2.bed"/>
92 <repeat name="sortkeys"> 104 <repeat name="sortkeys">
93 <param name="column" value="5"/> 105 <param name="column" value="5"/>
94 <param name="style" value="g"/> 106 <param name="style" value="g"/>
95 <param name="order" value=""/> 107 <param name="order" value=""/>
96 </repeat> 108 </repeat>
97 <output name="outfile" file="sort_result3.bed"/> 109 <output name="outfile" file="sort_result3.bed"/>
110 </test>
111 <test expect_num_outputs="1">
112 <param name="infile" value="sort3.tabular"/>
113 <param name="header" value="0"/>
114 <param name="unique" value="false"/>
115 <repeat name="sortkeys">
116 <param name="column" value="2"/>
117 <param name="start_charpos" value="7"/>
118 <param name="order" value=""/>
119 <param name="style" value="n"/>
120 </repeat>
121 <repeat name="sortkeys">
122 <param name="column" value="2"/>
123 <param name="start_charpos" value="4"/>
124 <param name="end_charpos" value="5"/>
125 <param name="order" value=""/>
126 <param name="style" value="n"/>
127 </repeat>
128 <repeat name="sortkeys">
129 <param name="column" value="2"/>
130 <param name="start_charpos" value="1"/>
131 <param name="end_charpos" value="2"/>
132 <param name="order" value="r"/>
133 <param name="style" value="n"/>
134 </repeat>
135 <output name="outfile" file="sorted3.tabular" ftype="tabular" />
136 </test>
137 <!-- Test ignore_case param -->
138 <test expect_num_outputs="1">
139 <param name="infile" value="sort4.tabular"/>
140 <param name="header" value="1"/>
141 <param name="unique" value="false"/>
142 <repeat name="sortkeys">
143 <param name="column" value="1"/>
144 <param name="order" value=""/>
145 <param name="style" value=""/>
146 <param name="ignore_case" value="true"/>
147 </repeat>
148 <repeat name="sortkeys">
149 <param name="column" value="3"/>
150 <param name="order" value="r"/>
151 <param name="style" value="n"/>
152 </repeat>
153 <output name="outfile" file="sorted4_partial.tabular" ftype="tabular" />
154 </test>
155 <!-- Test ignore_leading_blanks param -->
156 <test expect_num_outputs="1">
157 <param name="infile" value="sort4.tabular"/>
158 <param name="header" value="1"/>
159 <param name="unique" value="false"/>
160 <repeat name="sortkeys">
161 <param name="column" value="1"/>
162 <param name="start_charpos" value="1"/>
163 <param name="end_charpos" value="4"/>
164 <param name="order" value=""/>
165 <param name="style" value=""/>
166 <param name="ignore_case" value="true"/>
167 <param name="ignore_leading_blanks" value="true"/>
168 </repeat>
169 <repeat name="sortkeys">
170 <param name="column" value="1"/>
171 <param name="start_charpos" value="5"/>
172 <param name="order" value=""/>
173 <param name="style" value=""/>
174 <param name="ignore_case" value="true"/>
175 <param name="ignore_leading_blanks" value="true"/>
176 </repeat>
177 <repeat name="sortkeys">
178 <param name="column" value="3"/>
179 <param name="order" value="r"/>
180 <param name="style" value="n"/>
181 <param name="ignore_leading_blanks" value="true"/>
182 </repeat>
183 <output name="outfile" file="sorted4.tabular" ftype="tabular" />
98 </test> 184 </test>
99 </tests> 185 </tests>
100 <help> 186 <help>
101 <![CDATA[ 187 <![CDATA[
102 **What it does** 188 **What it does**
169 255
170 .. class:: infomark 256 .. class:: infomark
171 257
172 If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**, not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs. 258 If you're planning to use the file with another tool that expected sorted files (such as *join*), you should use the **Alphabetical sort**, not the **Natural Sort**. Natural sort order is easier for humans, but is unnatural for computer programs.
173 259
260 -----
261
262 **Example - Sorting based on parts of column values**
263
264 The above column of chromosomes, with their constant prefix, could have been sorted in natural order also with the **Fast numeric sort** and **considering its characters from** character 4 only.
265
266 In general, sorting based on just a range of characters in a column can be useful for sorting values with internal structure, in a single tool run.
267
268 Consider, for example, the following column of dates, which is unfortunately not ISO-8601 formatted::
269
270 10/24/2025
271 09/18/1974
272 12/16/1998
273 03/04/2007
274
275 You could modify these values with other tools first, but you can achieve correct chronological sort order with a single run of the sort tool like this:
276
277 - Do a **Fast numeric sort** on the column **considering its characters from** character 7 (the start of the year).
278 - Resolve ties (using another column selection section) with another **Fast numeric sort** on the same column **considering its characters from** character 1 **to and including** character 2 (the month representation).
279 - Resolve remaining ties with a third **Fast numeric sort** on again the same column **considering its characters from** character 4 **to and including** character 5 (the day representation).
280
281 This will result in the ascending chronological order::
282
283 09/18/1974
284 11/17/1998
285 11/18/1998
286 12/16/1998
287 03/04/2007
288 10/24/2025
289
290 Before relying on in-column character ranges, make extra sure that all values are formatted consistently (in the above example, that all dates use two digits for days and months and the same overall date format).
174 ]]> 291 ]]>
175 </help> 292 </help>
176 <expand macro="citations" /> 293 <expand macro="citations" />
177 </tool> 294 </tool>