Mercurial > repos > recetox > query
comparison query.py @ 0:0369de831b32 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/query commit e781279d988f26eff9ccfff898a7d8cc4f0ceafb"
| author | recetox |
|---|---|
| date | Wed, 09 Dec 2020 02:04:20 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0369de831b32 |
|---|---|
| 1 import json | |
| 2 from typing import Tuple | |
| 3 | |
| 4 import click | |
| 5 import pandas | |
| 6 import pandasql | |
| 7 from pandas import DataFrame | |
| 8 | |
| 9 | |
| 10 def read(path: str, filetype: str, name: str) -> Tuple[str, DataFrame]: | |
| 11 if filetype == 'csv': | |
| 12 return name, pandas.read_csv(path) | |
| 13 elif filetype in ('tsv', 'tabular'): | |
| 14 return name, pandas.read_table(path) | |
| 15 elif filetype in ('h5', 'hdf'): | |
| 16 return name, pandas.read_hdf(path, name) | |
| 17 elif filetype == 'feather': | |
| 18 return name, pandas.read_feather(path) | |
| 19 elif filetype == 'parquet': | |
| 20 return name, pandas.read_parquet(path) | |
| 21 elif filetype == 'sqlite': | |
| 22 return pandas.read_sql(name, f'sqlite:///{path}') | |
| 23 else: | |
| 24 raise NotImplementedError(f'Unknown filetype {filetype}') | |
| 25 | |
| 26 | |
| 27 def write(df: DataFrame, path: str, filetype: str, name: str) -> None: | |
| 28 if filetype == 'csv': | |
| 29 df.to_csv(path) | |
| 30 elif filetype in ('tsv', 'tabular'): | |
| 31 df.to_csv(path, sep='\t') | |
| 32 elif filetype in ('h5', 'hdf'): | |
| 33 with pandas.HDFStore(path) as file: | |
| 34 file.append(name, df, data_columns=list(df.columns)) | |
| 35 elif filetype == 'feather': | |
| 36 df.to_feather(path) | |
| 37 elif filetype == 'parquet': | |
| 38 df.to_parquet(path) | |
| 39 elif filetype == 'sqlite': | |
| 40 df.to_sql(name, f'sqlite:///{path}') | |
| 41 else: | |
| 42 raise NotImplementedError(f'Unknown filetype {filetype}') | |
| 43 | |
| 44 | |
| 45 @click.command() | |
| 46 @click.argument('config', type=click.File()) | |
| 47 def main(config) -> None: | |
| 48 config = json.load(config) | |
| 49 | |
| 50 tables = dict(read(table['path'], table['format'], table['name']) for table in config['tables']) | |
| 51 result = pandasql.sqldf(config['query'], tables) | |
| 52 write(result, config['result']['path'], config['result']['format'], config['result']['name']) | |
| 53 | |
| 54 | |
| 55 if __name__ == '__main__': | |
| 56 main() |
