Mercurial > repos > iuc > sqlite_to_tabular
comparison macros.xml @ 0:c866ec050312 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d
| author | iuc |
|---|---|
| date | Tue, 18 Jul 2017 09:06:16 -0400 |
| parents | |
| children | 89d898454f1e |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c866ec050312 |
|---|---|
| 1 <macros> | |
| 2 <token name="@LINEFILTERS@"> | |
| 3 <![CDATA[ | |
| 4 ## set linefilters to the | |
| 5 #set $input_filters = [] | |
| 6 #for $fi in $linefilters: | |
| 7 #if $fi.filter.filter_type == 'skip': | |
| 8 #set $skip_lines = None | |
| 9 #if str($fi.filter.skip_lines) != '': | |
| 10 #set $skip_lines = int($fi.filter.skip_lines) | |
| 11 #elif $tbl.table.metadata.comment_lines and $tbl.table.metadata.comment_lines > 0: | |
| 12 #set $skip_lines = int($tbl.table.metadata.comment_lines) | |
| 13 #end if | |
| 14 #if $skip_lines is not None: | |
| 15 #set $filter_dict = dict() | |
| 16 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 17 #set $filter_dict['count'] = $skip_lines | |
| 18 #silent $input_filters.append($filter_dict) | |
| 19 #end if | |
| 20 #elif $fi.filter.filter_type == 'comment': | |
| 21 #set $filter_dict = dict() | |
| 22 #set $filter_dict['filter'] = 'regex' | |
| 23 #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')]) | |
| 24 #set $filter_dict['action'] = 'exclude_match' | |
| 25 #silent $input_filters.append($filter_dict) | |
| 26 #elif $fi.filter.filter_type == 'regex': | |
| 27 #set $filter_dict = dict() | |
| 28 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 29 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) | |
| 30 #set $filter_dict['action'] = str($fi.filter.regex_action) | |
| 31 #silent $input_filters.append($filter_dict) | |
| 32 #elif $fi.filter.filter_type == 'select_columns': | |
| 33 #set $filter_dict = dict() | |
| 34 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 35 #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] | |
| 36 #silent $input_filters.append($filter_dict) | |
| 37 #elif $fi.filter.filter_type == 'replace': | |
| 38 #set $filter_dict = dict() | |
| 39 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 40 #set $filter_dict['column'] = int(str($fi.filter.column).replace('c','')) | |
| 41 #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) | |
| 42 #set $filter_dict['replace'] = str($fi.filter.regex_replace) | |
| 43 #silent $input_filters.append($filter_dict) | |
| 44 #elif str($fi.filter.filter_type).endswith('pend_line_num'): | |
| 45 #set $filter_dict = dict() | |
| 46 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 47 #silent $input_filters.append($filter_dict) | |
| 48 #elif str($fi.filter.filter_type).endswith('pend_text'): | |
| 49 #set $filter_dict = dict() | |
| 50 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 51 #set $filter_dict['column_text'] = str($fi.filter.column_text) | |
| 52 #silent $input_filters.append($filter_dict) | |
| 53 #elif $fi.filter.filter_type == 'normalize': | |
| 54 #set $filter_dict = dict() | |
| 55 #set $filter_dict['filter'] = str($fi.filter.filter_type) | |
| 56 #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] | |
| 57 #set $filter_dict['separator'] = str($fi.filter.separator) | |
| 58 #silent $input_filters.append($filter_dict) | |
| 59 #end if | |
| 60 #end for | |
| 61 ]]> | |
| 62 </token> | |
| 63 <xml name="macro_line_filters"> | |
| 64 <repeat name="linefilters" title="Filter Tabular Input Lines"> | |
| 65 <conditional name="filter"> | |
| 66 <param name="filter_type" type="select" label="Filter By"> | |
| 67 <option value="skip">skip leading lines</option> | |
| 68 <option value="comment">comment char</option> | |
| 69 <option value="regex">by regex expression matching</option> | |
| 70 <option value="select_columns">select columns</option> | |
| 71 <option value="replace">regex replace value in column</option> | |
| 72 <option value="prepend_line_num">prepend a line number column</option> | |
| 73 <option value="append_line_num">append a line number column</option> | |
| 74 <option value="prepend_text">prepend a column with the given text</option> | |
| 75 <option value="append_text">append a column with the given text</option> | |
| 76 <option value="normalize">normalize list columns, replicates row for each item in list</option> | |
| 77 </param> | |
| 78 <when value="skip"> | |
| 79 <param name="skip_lines" type="integer" value="" min="0" optional="true" label="Skip lines" | |
| 80 help="Leave blank to use the comment lines metadata for this dataset" /> | |
| 81 </when> | |
| 82 <when value="comment"> | |
| 83 <param name="comment_char" type="select" display="checkboxes" multiple="True" label="Ignore lines beginning with these characters" help="lines beginning with these are skipped"> | |
| 84 <option value="62">></option> | |
| 85 <option value="64">@</option> | |
| 86 <option value="43">+</option> | |
| 87 <option value="60"><</option> | |
| 88 <option value="42">*</option> | |
| 89 <option value="45">-</option> | |
| 90 <option value="61">=</option> | |
| 91 <option value="124">|</option> | |
| 92 <option value="63">?</option> | |
| 93 <option value="36">$</option> | |
| 94 <option value="46">.</option> | |
| 95 <option value="58">:</option> | |
| 96 <option value="38">&</option> | |
| 97 <option value="37">%</option> | |
| 98 <option value="94">^</option> | |
| 99 <option value="35">#</option> | |
| 100 <option value="33">!</option> | |
| 101 </param> | |
| 102 </when> | |
| 103 <when value="prepend_line_num"/> | |
| 104 <when value="append_line_num"/> | |
| 105 <when value="prepend_text"> | |
| 106 <param name="column_text" type="text" value="" label="text for column"> | |
| 107 </param> | |
| 108 </when> | |
| 109 <when value="append_text"> | |
| 110 <param name="column_text" type="text" value="" label="text for column"> | |
| 111 </param> | |
| 112 </when> | |
| 113 <when value="regex"> | |
| 114 <param name="regex_pattern" type="text" value="" label="regex pattern"> | |
| 115 <sanitizer sanitize="False"/> | |
| 116 </param> | |
| 117 <param name="regex_action" type="select" label="action for regex match"> | |
| 118 <option value="exclude_match">exclude line on pattern match</option> | |
| 119 <option value="include_match">include line on pattern match</option> | |
| 120 <option value="exclude_find">exclude line if pattern found</option> | |
| 121 <option value="include_find">include line if pattern found</option> | |
| 122 </param> | |
| 123 </when> | |
| 124 <when value="select_columns"> | |
| 125 <param name="columns" type="text" value="" label="enter column numbers to keep" | |
| 126 help="example: 1,4,2 or c1,c4,c2(selects the first,fourth, and second columns)"> | |
| 127 <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> | |
| 128 </param> | |
| 129 </when> | |
| 130 <when value="replace"> | |
| 131 <param name="column" type="text" value="" label="enter column number to replace" | |
| 132 help="example: 1 or c1 (selects the first column)"> | |
| 133 <validator type="regex" message="Column ordinal position separated by commas">^(c?[1-9]\d*)$</validator> | |
| 134 </param> | |
| 135 <param name="regex_pattern" type="text" value="" label="regex pattern"> | |
| 136 <sanitizer sanitize="False"/> | |
| 137 </param> | |
| 138 <param name="regex_replace" type="text" value="" label="replacement expression"> | |
| 139 <sanitizer sanitize="False"/> | |
| 140 </param> | |
| 141 </when> | |
| 142 <when value="normalize"> | |
| 143 <param name="columns" type="text" value="" label="enter column numbers to normalize"> | |
| 144 <help><![CDATA[ | |
| 145 example: 2,4 or c2,c4 (selects the second, and fourth columns) | |
| 146 If multiple columns are selected, they should have the same length and separator on each line | |
| 147 ]]></help> | |
| 148 <validator type="regex" message="Column ordinal positions separated by commas">^(c?[1-9]\d*)(,c?[1-9]\d*)*$</validator> | |
| 149 </param> | |
| 150 <param name="separator" type="text" value="," label="List item delimiter in column"> | |
| 151 <sanitizer sanitize="False"/> | |
| 152 <validator type="regex" message="Anything but TAB or Newline">^[^\t\n\r\f\v]+$</validator> | |
| 153 </param> | |
| 154 </when> | |
| 155 </conditional> | |
| 156 </repeat> | |
| 157 </xml> | |
| 158 | |
| 159 <token name="@LINEFILTERS_HELP@"> | |
| 160 <![CDATA[ | |
| 161 **Input Line Filters** | |
| 162 | |
| 163 As a tabular file is being read, line filters may be applied. | |
| 164 | |
| 165 :: | |
| 166 | |
| 167 - skip leading lines skip the first *number* of lines | |
| 168 - comment char omit any lines that start with the specified comment character | |
| 169 - by regex expression matching *include/exclude* lines the match the regex expression | |
| 170 - select columns choose to include only selected columns in the order specified | |
| 171 - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting) | |
| 172 - prepend a line number column each line has the ordinal value of the line read by this filter as the first column | |
| 173 - append a line number column each line has the ordinal value of the line read by this filter as the last column | |
| 174 - prepend a text column each line has the text string as the first column | |
| 175 - append a text column each line has the text string as the last column | |
| 176 - normalize list columns replicates the line for each item in the specified list *columns* | |
| 177 ]]> | |
| 178 </token> | |
| 179 | |
| 180 <token name="@LINEFILTERS_HELP_EXAMPLE@"> | |
| 181 <![CDATA[ | |
| 182 **Line Filtering Example** | |
| 183 *(Six filters are applied as the following file is read)* | |
| 184 | |
| 185 :: | |
| 186 | |
| 187 Input Tabular File: | |
| 188 | |
| 189 #People with pets | |
| 190 Pets FirstName LastName DOB PetNames PetType | |
| 191 2 Paula Brown 24/05/78 Rex,Fluff dog,cat | |
| 192 1 Steven Jones 04/04/74 Allie cat | |
| 193 0 Jane Doe 24/05/78 | |
| 194 1 James Smith 20/10/80 Spot | |
| 195 | |
| 196 | |
| 197 Filter 1 - append a line number column: | |
| 198 | |
| 199 #People with pets 1 | |
| 200 Pets FirstName LastName DOB PetNames PetType 2 | |
| 201 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 | |
| 202 1 Steven Jones 04/04/74 Allie cat 4 | |
| 203 0 Jane Doe 24/05/78 5 | |
| 204 1 James Smith 20/10/80 Spot 6 | |
| 205 | |
| 206 Filter 2 - by regex expression matching [include]: '^\d+' (include lines that start with a number) | |
| 207 | |
| 208 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 | |
| 209 1 Steven Jones 04/04/74 Allie cat 4 | |
| 210 0 Jane Doe 24/05/78 5 | |
| 211 1 James Smith 20/10/80 Spot 6 | |
| 212 | |
| 213 Filter 3 - append a line number column: | |
| 214 | |
| 215 2 Paula Brown 24/05/78 Rex,Fluff dog,cat 3 1 | |
| 216 1 Steven Jones 04/04/74 Allie cat 4 2 | |
| 217 0 Jane Doe 24/05/78 5 3 | |
| 218 1 James Smith 20/10/80 Spot 6 4 | |
| 219 | |
| 220 Filter 4 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format) | |
| 221 | |
| 222 2 Paula Brown 1978-05-24 Rex,Fluff dog,cat 3 1 | |
| 223 1 Steven Jones 1974-04-04 Allie cat 4 2 | |
| 224 0 Jane Doe 1978-05-24 5 3 | |
| 225 1 James Smith 1980-10-20 Spot 6 4 | |
| 226 | |
| 227 Filter 5 - normalize list columns[5,6]: | |
| 228 | |
| 229 2 Paula Brown 1978-05-24 Rex dog 3 1 | |
| 230 2 Paula Brown 1978-05-24 Fluff cat 3 1 | |
| 231 1 Steven Jones 1974-04-04 Allie cat 4 2 | |
| 232 0 Jane Doe 1978-05-24 5 3 | |
| 233 1 James Smith 1980-10-20 Spot 6 4 | |
| 234 | |
| 235 Filter 6 - append a line number column: | |
| 236 | |
| 237 2 Paula Brown 1978-05-24 Rex dog 3 1 1 | |
| 238 2 Paula Brown 1978-05-24 Fluff cat 3 1 2 | |
| 239 1 Steven Jones 1974-04-04 Allie cat 4 2 3 | |
| 240 0 Jane Doe 1978-05-24 5 3 4 | |
| 241 1 James Smith 1980-10-20 Spot 6 4 5 | |
| 242 | |
| 243 ]]> | |
| 244 </token> | |
| 245 | |
| 246 <token name="@QUERY_HELP@"> | |
| 247 <![CDATA[ | |
| 248 | |
| 249 For help in using SQLite_ see: http://www.sqlite.org/docs.html | |
| 250 | |
| 251 **NOTE:** input for SQLite dates input field must be in the format: *YYYY-MM-DD* for example: 2015-09-30 | |
| 252 | |
| 253 See: http://www.sqlite.org/lang_datefunc.html | |
| 254 | |
| 255 **Example** | |
| 256 | |
| 257 Given 2 tabular datasets: *customers* and *sales* | |
| 258 | |
| 259 Dataset *customers* | |
| 260 | |
| 261 Table name: "customers" | |
| 262 | |
| 263 Column names: "CustomerID,FirstName,LastName,Email,DOB,Phone" | |
| 264 | |
| 265 =========== ========== ========== ===================== ========== ============ | |
| 266 #CustomerID FirstName LastName Email DOB Phone | |
| 267 =========== ========== ========== ===================== ========== ============ | |
| 268 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 | |
| 269 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 | |
| 270 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 | |
| 271 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 | |
| 272 =========== ========== ========== ===================== ========== ============ | |
| 273 | |
| 274 Dataset *sales* | |
| 275 | |
| 276 Table name: "sales" | |
| 277 | |
| 278 Column names: "CustomerID,Date,SaleAmount" | |
| 279 | |
| 280 ============= ============ ============ | |
| 281 #CustomerID Date SaleAmount | |
| 282 ============= ============ ============ | |
| 283 2 2004-05-06 100.22 | |
| 284 1 2004-05-07 99.95 | |
| 285 3 2004-05-07 122.95 | |
| 286 3 2004-05-13 100.00 | |
| 287 4 2004-05-22 555.55 | |
| 288 ============= ============ ============ | |
| 289 | |
| 290 The query | |
| 291 | |
| 292 :: | |
| 293 | |
| 294 SELECT FirstName,LastName,sum(SaleAmount) as "TotalSales" | |
| 295 FROM customers join sales on customers.CustomerID = sales.CustomerID | |
| 296 GROUP BY customers.CustomerID ORDER BY TotalSales DESC; | |
| 297 | |
| 298 Produces this tabular output: | |
| 299 | |
| 300 ========== ======== ========== | |
| 301 #FirstName LastName TotalSales | |
| 302 ========== ======== ========== | |
| 303 James Smith 555.55 | |
| 304 Paula Brown 222.95 | |
| 305 Steven Goldfish 100.22 | |
| 306 John Smith 99.95 | |
| 307 ========== ======== ========== | |
| 308 | |
| 309 | |
| 310 If the optional Table name and Column names inputs are not used, the query would be: | |
| 311 | |
| 312 :: | |
| 313 | |
| 314 SELECT t1.c2 as "FirstName", t1.c3 as "LastName", sum(t2.c3) as "TotalSales" | |
| 315 FROM t1 join t2 on t1.c1 = t2.c1 | |
| 316 GROUP BY t1.c1 ORDER BY TotalSales DESC; | |
| 317 | |
| 318 You can selectively name columns, e.g. on the customers input you could just name columns 2,3, and 5: | |
| 319 | |
| 320 Column names: ,FirstName,LastName,,BirthDate | |
| 321 | |
| 322 Results in the following data base table | |
| 323 | |
| 324 =========== ========== ========== ===================== ========== ============ | |
| 325 #c1 FirstName LastName c4 BirthDate c6 | |
| 326 =========== ========== ========== ===================== ========== ============ | |
| 327 1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 | |
| 328 2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 | |
| 329 3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 | |
| 330 4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 | |
| 331 =========== ========== ========== ===================== ========== ============ | |
| 332 | |
| 333 | |
| 334 Regular_expression_ functions are included for: | |
| 335 | |
| 336 :: | |
| 337 | |
| 338 matching: re_match('pattern',column) | |
| 339 | |
| 340 SELECT t1.FirstName, t1.LastName | |
| 341 FROM t1 | |
| 342 WHERE re_match('^.*\.(net|org)$',c4) | |
| 343 | |
| 344 Results: | |
| 345 | |
| 346 =========== ========== | |
| 347 #FirstName LastName | |
| 348 =========== ========== | |
| 349 Steven Goldfish | |
| 350 Paula Brown | |
| 351 =========== ========== | |
| 352 | |
| 353 | |
| 354 :: | |
| 355 | |
| 356 searching: re_search('pattern',column) | |
| 357 substituting: re_sub('pattern','replacement,column) | |
| 358 | |
| 359 SELECT t1.FirstName, t1.LastName, re_sub('^\d{2}(\d{2})-(\d\d)-(\d\d)','\3/\2/\1',BirthDate) as "DOB" | |
| 360 FROM t1 | |
| 361 WHERE re_search('[hp]er',c4) | |
| 362 | |
| 363 Results: | |
| 364 | |
| 365 | |
| 366 =========== ========== ========== | |
| 367 #FirstName LastName DOB | |
| 368 =========== ========== ========== | |
| 369 Steven Goldfish 04/04/74 | |
| 370 Paula Brown 24/05/78 | |
| 371 James Smith 20/10/80 | |
| 372 =========== ========== ========== | |
| 373 | |
| 374 .. _Regular_expression: https://docs.python.org/release/2.7/library/re.html | |
| 375 .. _SQLite: http://www.sqlite.org/index.html | |
| 376 .. _SQLite_functions: http://www.sqlite.org/docs.html | |
| 377 | |
| 378 | |
| 379 ]]> | |
| 380 </token> | |
| 381 | |
| 382 </macros> | |
| 383 |
