Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.py @ 10:98bd1e29d669 draft default tip
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 29288f94a382686e263623cf6ddcd235ed5f2310-dirty
| author | jjohnson |
|---|---|
| date | Wed, 20 Apr 2016 15:44:33 -0400 |
| parents | 6d9c91071884 |
| children |
comparison
equal
deleted
inserted
replaced
| 9:b7f149b4792f | 10:98bd1e29d669 |
|---|---|
| 12 """ | 12 """ |
| 13 TODO: | 13 TODO: |
| 14 - could read column names from comment lines, but issues with legal names | 14 - could read column names from comment lines, but issues with legal names |
| 15 - could add some transformations on tabular columns, | 15 - could add some transformations on tabular columns, |
| 16 e.g. a regex to format date/time strings | 16 e.g. a regex to format date/time strings |
| 17 index: ['c2','c4,c5'] | |
| 18 unique: ['c1'] | |
| 19 format: { | 17 format: { |
| 20 c2 : re.sub('pat', 'sub', c2) | 18 c2 : re.sub('pat', 'sub', c2) |
| 21 c3 : len(c3) | 19 c3 : len(c3) |
| 22 } | 20 } |
| 23 def format(colname,val, expr): | 21 def format(colname,val, expr): |
| 24 | |
| 25 - allow optional autoincrement id column - user supplied name? | |
| 26 autoincrement : 'id' | |
| 27 - column_defs dict of columns to create from tabular input | 22 - column_defs dict of columns to create from tabular input |
| 28 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} | 23 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} |
| 29 - allow multiple queries and outputs | 24 - allow multiple queries and outputs |
| 30 - add a --json input for table definitions (or yaml) | 25 - add a --json input for table definitions (or yaml) |
| 31 JSON config: | 26 JSON config: |
| 32 { tables : [ | 27 { tables : [ |
| 33 { file_path : '/home/galaxy/dataset_101.dat', | 28 { file_path : '/home/galaxy/dataset_101.dat', |
| 34 table_name : 't1', | 29 table_name : 't1', |
| 35 column_names : ['c1', 'c2', 'c3'], | 30 column_names : ['c1', 'c2', 'c3'], |
| 31 pkey_autoincr : 'id' | |
| 36 comment_lines : 1 | 32 comment_lines : 1 |
| 33 unique: ['c1'], | |
| 34 index: ['c2','c3'] | |
| 37 }, | 35 }, |
| 38 { file_path : '/home/galaxy/dataset_102.dat', | 36 { file_path : '/home/galaxy/dataset_102.dat', |
| 39 table_name : 'gff', | 37 table_name : 'gff', |
| 40 column_names : ['seqname',,,'start','end'] | 38 column_names : ['seqname',,,'start','end'] |
| 41 comment_lines : 1 | 39 comment_lines : 1 |
| 117 for i, col_name in enumerate(col_names): | 115 for i, col_name in enumerate(col_names): |
| 118 col_def.append('%s %s' % (col_names[i], col_types[i])) | 116 col_def.append('%s %s' % (col_names[i], col_types[i])) |
| 119 return col_names, col_types, col_def, col_idx | 117 return col_names, col_types, col_def, col_idx |
| 120 | 118 |
| 121 | 119 |
| 122 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): | 120 def create_table(conn, file_path, table_name, skip=0, comment_char='#', pkey_autoincr=None, column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): |
| 123 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) | 121 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) |
| 124 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] | 122 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] |
| 125 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) | 123 table_def = 'CREATE TABLE %s (\n %s%s\n);' % ( |
| 124 table_name, | |
| 125 '%s INTEGER PRIMARY KEY AUTOINCREMENT,' % pkey_autoincr if pkey_autoincr else '', | |
| 126 ', \n '.join(col_def)) | |
| 126 # print >> sys.stdout, table_def | 127 # print >> sys.stdout, table_def |
| 127 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) | 128 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) |
| 128 # print >> sys.stdout, insert_stmt | 129 # print >> sys.stdout, insert_stmt |
| 129 data_lines = 0 | 130 data_lines = 0 |
| 130 try: | 131 try: |
| 241 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False | 242 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False |
| 242 else: | 243 else: |
| 243 load_named_columns = False | 244 load_named_columns = False |
| 244 unique_indexes = table['unique'] if 'unique' in table else [] | 245 unique_indexes = table['unique'] if 'unique' in table else [] |
| 245 indexes = table['index'] if 'index' in table else [] | 246 indexes = table['index'] if 'index' in table else [] |
| 246 create_table(conn, path, table_name, column_names=column_names, | 247 pkey_autoincr = table['pkey_autoincr'] if 'pkey_autoincr' in table else None |
| 248 create_table(conn, path, table_name, pkey_autoincr=pkey_autoincr, column_names=column_names, | |
| 247 skip=comment_lines, load_named_columns=load_named_columns, | 249 skip=comment_lines, load_named_columns=load_named_columns, |
| 248 unique_indexes=unique_indexes, indexes=indexes) | 250 unique_indexes=unique_indexes, indexes=indexes) |
| 249 except Exception, exc: | 251 except Exception, exc: |
| 250 print >> sys.stderr, "Error: %s" % exc | 252 print >> sys.stderr, "Error: %s" % exc |
| 251 conn.close() | 253 conn.close() |
