Mercurial > repos > recetox > query
diff query.py @ 0:0369de831b32 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
author | recetox |
---|---|
date | Wed, 09 Dec 2020 02:04:20 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/query.py Wed Dec 09 02:04:20 2020 +0000 @@ -0,0 +1,56 @@ +import json +from typing import Tuple + +import click +import pandas +import pandasql +from pandas import DataFrame + + +def read(path: str, filetype: str, name: str) -> Tuple[str, DataFrame]: + if filetype == 'csv': + return name, pandas.read_csv(path) + elif filetype in ('tsv', 'tabular'): + return name, pandas.read_table(path) + elif filetype in ('h5', 'hdf'): + return name, pandas.read_hdf(path, name) + elif filetype == 'feather': + return name, pandas.read_feather(path) + elif filetype == 'parquet': + return name, pandas.read_parquet(path) + elif filetype == 'sqlite': + return pandas.read_sql(name, f'sqlite:///{path}') + else: + raise NotImplementedError(f'Unknown filetype {filetype}') + + +def write(df: DataFrame, path: str, filetype: str, name: str) -> None: + if filetype == 'csv': + df.to_csv(path) + elif filetype in ('tsv', 'tabular'): + df.to_csv(path, sep='\t') + elif filetype in ('h5', 'hdf'): + with pandas.HDFStore(path) as file: + file.append(name, df, data_columns=list(df.columns)) + elif filetype == 'feather': + df.to_feather(path) + elif filetype == 'parquet': + df.to_parquet(path) + elif filetype == 'sqlite': + df.to_sql(name, f'sqlite:///{path}') + else: + raise NotImplementedError(f'Unknown filetype {filetype}') + + +@click.command() +@click.argument('config', type=click.File()) +def main(config) -> None: + config = json.load(config) + + tables = dict(read(table['path'], table['format'], table['name']) for table in config['tables']) + result = pandasql.sqldf(config['query'], tables) + write(result, config['result']['path'], config['result']['format'], config['result']['name']) + + +if __name__ == '__main__': + main()