Mercurial > repos > jjohnson > query_tabular
comparison query_tabular.py @ 8:6d9c91071884 draft
planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/query_tabular commit 60d1a49c09f87c1c1ec6fecbe54aa226bdc695a7-dirty
| author | jjohnson |
|---|---|
| date | Fri, 15 Apr 2016 16:29:51 -0400 |
| parents | aa2409ae9dc0 |
| children | 98bd1e29d669 |
comparison
equal
deleted
inserted
replaced
| 7:aa2409ae9dc0 | 8:6d9c91071884 |
|---|---|
| 12 """ | 12 """ |
| 13 TODO: | 13 TODO: |
| 14 - could read column names from comment lines, but issues with legal names | 14 - could read column names from comment lines, but issues with legal names |
| 15 - could add some transformations on tabular columns, | 15 - could add some transformations on tabular columns, |
| 16 e.g. a regex to format date/time strings | 16 e.g. a regex to format date/time strings |
| 17 c2 : re.sub('pat', 'sub', c2) | 17 index: ['c2','c4,c5'] |
| 18 c3 : | 18 unique: ['c1'] |
| 19 format: { | |
| 20 c2 : re.sub('pat', 'sub', c2) | |
| 21 c3 : len(c3) | |
| 22 } | |
| 23 def format(colname,val, expr): | |
| 24 | |
| 25 - allow optional autoincrement id column - user supplied name? | |
| 26 autoincrement : 'id' | |
| 19 - column_defs dict of columns to create from tabular input | 27 - column_defs dict of columns to create from tabular input |
| 20 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} | 28 column_defs : { 'name1' : 'expr', 'name2' : 'expr'} |
| 21 - allow multiple queries and outputs | 29 - allow multiple queries and outputs |
| 22 - add a --json input for table definitions (or yaml) | 30 - add a --json input for table definitions (or yaml) |
| 23 JSON config: | 31 JSON config: |
| 109 for i, col_name in enumerate(col_names): | 117 for i, col_name in enumerate(col_names): |
| 110 col_def.append('%s %s' % (col_names[i], col_types[i])) | 118 col_def.append('%s %s' % (col_names[i], col_types[i])) |
| 111 return col_names, col_types, col_def, col_idx | 119 return col_names, col_types, col_def, col_idx |
| 112 | 120 |
| 113 | 121 |
| 114 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False): | 122 def create_table(conn, file_path, table_name, skip=0, comment_char='#', column_names=None,load_named_columns=False,unique_indexes=[],indexes=[]): |
| 115 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) | 123 col_names, col_types, col_def, col_idx = get_column_def(file_path, table_name, skip=skip, comment_char=comment_char, column_names=column_names,load_named_columns=load_named_columns) |
| 116 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] | 124 col_func = [float if t == 'REAL' else int if t == 'INTEGER' else str for t in col_types] |
| 117 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) | 125 table_def = 'CREATE TABLE %s (\n %s\n);' % (table_name, ', \n '.join(col_def)) |
| 118 # print >> sys.stdout, table_def | 126 # print >> sys.stdout, table_def |
| 119 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) | 127 insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % (table_name, ','.join(col_names), ','.join(["?" for x in col_names])) |
| 120 # print >> sys.stdout, insert_stmt | 128 # print >> sys.stdout, insert_stmt |
| 121 data_lines = 0 | 129 data_lines = 0 |
| 122 try: | 130 try: |
| 123 c = conn.cursor() | 131 c = conn.cursor() |
| 124 c.execute(table_def) | 132 c.execute(table_def) |
| 133 conn.commit() | |
| 134 c.close() | |
| 135 for i,index in enumerate(unique_indexes): | |
| 136 index_name='idx_uniq_%s_%d' % (table_name,i) | |
| 137 index_columns = index.split(',') | |
| 138 create_index(conn, table_name, index_name, index_columns,unique=True) | |
| 139 for i,index in enumerate(indexes): | |
| 140 index_name='idx_%s_%d' % (table_name,i) | |
| 141 index_columns = index.split(',') | |
| 142 create_index(conn, table_name, index_name, index_columns) | |
| 143 c = conn.cursor() | |
| 125 with open(file_path, "r") as fh: | 144 with open(file_path, "r") as fh: |
| 126 for linenum, line in enumerate(fh): | 145 for linenum, line in enumerate(fh): |
| 127 if linenum < skip or line.startswith(comment_char): | 146 if linenum < skip or line.startswith(comment_char): |
| 128 continue | 147 continue |
| 129 data_lines += 1 | 148 data_lines += 1 |
| 139 c.close() | 158 c.close() |
| 140 except Exception, e: | 159 except Exception, e: |
| 141 print >> sys.stderr, 'Failed: %s' % (e) | 160 print >> sys.stderr, 'Failed: %s' % (e) |
| 142 exit(1) | 161 exit(1) |
| 143 | 162 |
| 163 def create_index(conn, table_name, index_name, index_columns, unique=False): | |
| 164 index_def = "CREATE %s INDEX %s on %s(%s)" % ('UNIQUE' if unique else '', index_name, table_name, ','.join(index_columns)) | |
| 165 c = conn.cursor() | |
| 166 c.execute(index_def) | |
| 167 conn.commit() | |
| 168 c.close() | |
| 144 | 169 |
| 145 def regex_match(expr, item): | 170 def regex_match(expr, item): |
| 146 return re.match(expr, item) is not None | 171 return re.match(expr, item) is not None |
| 147 | 172 |
| 148 | 173 |
| 214 column_names = table['column_names'] if 'column_names' in table else None | 239 column_names = table['column_names'] if 'column_names' in table else None |
| 215 if column_names: | 240 if column_names: |
| 216 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False | 241 load_named_columns = table['load_named_columns'] if 'load_named_columns' in table else False |
| 217 else: | 242 else: |
| 218 load_named_columns = False | 243 load_named_columns = False |
| 219 create_table(conn, path, table_name, column_names=column_names, skip=comment_lines,load_named_columns=load_named_columns) | 244 unique_indexes = table['unique'] if 'unique' in table else [] |
| 245 indexes = table['index'] if 'index' in table else [] | |
| 246 create_table(conn, path, table_name, column_names=column_names, | |
| 247 skip=comment_lines, load_named_columns=load_named_columns, | |
| 248 unique_indexes=unique_indexes, indexes=indexes) | |
| 220 except Exception, exc: | 249 except Exception, exc: |
| 221 print >> sys.stderr, "Error: %s" % exc | 250 print >> sys.stderr, "Error: %s" % exc |
| 222 conn.close() | 251 conn.close() |
| 223 | 252 |
| 224 query = None | 253 query = None |
