annotate query.py @ 0:0369de831b32 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
author recetox
date Wed, 09 Dec 2020 02:04:20 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
1 import json
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
2 from typing import Tuple
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
3
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
4 import click
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
5 import pandas
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
6 import pandasql
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
7 from pandas import DataFrame
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
8
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
9
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
10 def read(path: str, filetype: str, name: str) -> Tuple[str, DataFrame]:
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
11 if filetype == 'csv':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
12 return name, pandas.read_csv(path)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
13 elif filetype in ('tsv', 'tabular'):
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
14 return name, pandas.read_table(path)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
15 elif filetype in ('h5', 'hdf'):
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
16 return name, pandas.read_hdf(path, name)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
17 elif filetype == 'feather':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
18 return name, pandas.read_feather(path)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
19 elif filetype == 'parquet':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
20 return name, pandas.read_parquet(path)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
21 elif filetype == 'sqlite':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
22 return pandas.read_sql(name, f'sqlite:///{path}')
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
23 else:
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
24 raise NotImplementedError(f'Unknown filetype {filetype}')
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
25
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
26
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
27 def write(df: DataFrame, path: str, filetype: str, name: str) -> None:
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
28 if filetype == 'csv':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
29 df.to_csv(path)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
30 elif filetype in ('tsv', 'tabular'):
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
31 df.to_csv(path, sep='\t')
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
32 elif filetype in ('h5', 'hdf'):
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
33 with pandas.HDFStore(path) as file:
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
34 file.append(name, df, data_columns=list(df.columns))
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
35 elif filetype == 'feather':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
36 df.to_feather(path)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
37 elif filetype == 'parquet':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
38 df.to_parquet(path)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
39 elif filetype == 'sqlite':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
40 df.to_sql(name, f'sqlite:///{path}')
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
41 else:
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
42 raise NotImplementedError(f'Unknown filetype {filetype}')
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
43
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
44
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
45 @click.command()
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
46 @click.argument('config', type=click.File())
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
47 def main(config) -> None:
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
48 config = json.load(config)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
49
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
50 tables = dict(read(table['path'], table['format'], table['name']) for table in config['tables'])
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
51 result = pandasql.sqldf(config['query'], tables)
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
52 write(result, config['result']['path'], config['result']['format'], config['result']['name'])
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
53
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
54
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
55 if __name__ == '__main__':
0369de831b32 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff changeset
56 main()