comparison query.py @ 0:0369de831b32 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
author recetox
date Wed, 09 Dec 2020 02:04:20 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0369de831b32
1 import json
2 from typing import Tuple
3
4 import click
5 import pandas
6 import pandasql
7 from pandas import DataFrame
8
9
10 def read(path: str, filetype: str, name: str) -> Tuple[str, DataFrame]:
11 if filetype == 'csv':
12 return name, pandas.read_csv(path)
13 elif filetype in ('tsv', 'tabular'):
14 return name, pandas.read_table(path)
15 elif filetype in ('h5', 'hdf'):
16 return name, pandas.read_hdf(path, name)
17 elif filetype == 'feather':
18 return name, pandas.read_feather(path)
19 elif filetype == 'parquet':
20 return name, pandas.read_parquet(path)
21 elif filetype == 'sqlite':
22 return pandas.read_sql(name, f'sqlite:///{path}')
23 else:
24 raise NotImplementedError(f'Unknown filetype {filetype}')
25
26
27 def write(df: DataFrame, path: str, filetype: str, name: str) -> None:
28 if filetype == 'csv':
29 df.to_csv(path)
30 elif filetype in ('tsv', 'tabular'):
31 df.to_csv(path, sep='\t')
32 elif filetype in ('h5', 'hdf'):
33 with pandas.HDFStore(path) as file:
34 file.append(name, df, data_columns=list(df.columns))
35 elif filetype == 'feather':
36 df.to_feather(path)
37 elif filetype == 'parquet':
38 df.to_parquet(path)
39 elif filetype == 'sqlite':
40 df.to_sql(name, f'sqlite:///{path}')
41 else:
42 raise NotImplementedError(f'Unknown filetype {filetype}')
43
44
45 @click.command()
46 @click.argument('config', type=click.File())
47 def main(config) -> None:
48 config = json.load(config)
49
50 tables = dict(read(table['path'], table['format'], table['name']) for table in config['tables'])
51 result = pandasql.sqldf(config['query'], tables)
52 write(result, config['result']['path'], config['result']['format'], config['result']['name'])
53
54
55 if __name__ == '__main__':
56 main()