Mercurial > repos > recetox > query
annotate query.py @ 0:0369de831b32 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
author | recetox |
---|---|
date | Wed, 09 Dec 2020 02:04:20 +0000 |
parents | |
children |
rev | line source |
---|---|
0
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
1 import json |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
2 from typing import Tuple |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
3 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
4 import click |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
5 import pandas |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
6 import pandasql |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
7 from pandas import DataFrame |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
8 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
9 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
10 def read(path: str, filetype: str, name: str) -> Tuple[str, DataFrame]: |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
11 if filetype == 'csv': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
12 return name, pandas.read_csv(path) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
13 elif filetype in ('tsv', 'tabular'): |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
14 return name, pandas.read_table(path) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
15 elif filetype in ('h5', 'hdf'): |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
16 return name, pandas.read_hdf(path, name) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
17 elif filetype == 'feather': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
18 return name, pandas.read_feather(path) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
19 elif filetype == 'parquet': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
20 return name, pandas.read_parquet(path) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
21 elif filetype == 'sqlite': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
22 return pandas.read_sql(name, f'sqlite:///{path}') |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
23 else: |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
24 raise NotImplementedError(f'Unknown filetype {filetype}') |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
25 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
26 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
27 def write(df: DataFrame, path: str, filetype: str, name: str) -> None: |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
28 if filetype == 'csv': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
29 df.to_csv(path) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
30 elif filetype in ('tsv', 'tabular'): |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
31 df.to_csv(path, sep='\t') |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
32 elif filetype in ('h5', 'hdf'): |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
33 with pandas.HDFStore(path) as file: |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
34 file.append(name, df, data_columns=list(df.columns)) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
35 elif filetype == 'feather': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
36 df.to_feather(path) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
37 elif filetype == 'parquet': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
38 df.to_parquet(path) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
39 elif filetype == 'sqlite': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
40 df.to_sql(name, f'sqlite:///{path}') |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
41 else: |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
42 raise NotImplementedError(f'Unknown filetype {filetype}') |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
43 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
44 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
45 @click.command() |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
46 @click.argument('config', type=click.File()) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
47 def main(config) -> None: |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
48 config = json.load(config) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
49 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
50 tables = dict(read(table['path'], table['format'], table['name']) for table in config['tables']) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
51 result = pandasql.sqldf(config['query'], tables) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
52 write(result, config['result']['path'], config['result']['format'], config['result']['name']) |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
53 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
54 |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
55 if __name__ == '__main__': |
0369de831b32
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
recetox
parents:
diff
changeset
|
56 main() |