Mercurial > repos > kls286 > chap_test_20230328
diff build/lib/CHAP/reader.py @ 0:cbbe42422d56 draft
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
| author | kls286 | 
|---|---|
| date | Tue, 28 Mar 2023 15:07:30 +0000 | 
| parents | |
| children | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/lib/CHAP/reader.py Tue Mar 28 15:07:30 2023 +0000 @@ -0,0 +1,209 @@ +#!/usr/bin/env python +""" +File : reader.py +Author : Valentin Kuznetsov <vkuznet AT gmail dot com> +Description: generic Reader module +""" + +# system modules +import argparse +import json +import logging +import sys +from time import time + +# local modules +# from pipeline import PipelineObject + +class Reader(): + """ + Reader represent generic file writer + """ + + def __init__(self): + """ + Constructor of Reader class + """ + self.__name__ = self.__class__.__name__ + self.logger = logging.getLogger(self.__name__) + self.logger.propagate = False + + def read(self, type_=None, schema=None, encoding=None, **_read_kwargs): + '''Read API + + Wrapper to read, format, and return the data requested. + + :param type_: the expected type of data read from `filename`, defualts + to `None` + :type type_: type, optional + :param schema: the expected schema of the data read from `filename`, + defaults to `None` + :type schema: str, otional + :param _read_kwargs: keyword arguments to pass to `self._read`, defaults + to `{}` + :type _read_kwargs: dict, optional + :return: list with one item: a dictionary containing the data read from + `filename`, the name of this `Reader`, and the values of `type_` and + `schema`. + :rtype: list[dict[str,object]] + ''' + + t0 = time() + self.logger.info(f'Executing "read" with type={type_}, schema={schema}, kwargs={_read_kwargs}') + + data = [{'name': self.__name__, + 'data': self._read(**_read_kwargs), + 'type': type_, + 'schema': schema, + 'encoding': encoding}] + + self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') + return(data) + + def _read(self, filename): + '''Read and return the data from requested from `filename` + + :param filename: Name of file to read from + :return: specific number of bytes from a file + ''' + + if not filename: + self.logger.warning('No file name is given, will skip read operation') + return None + + with open(filename) as file: + data = file.read() + return(data) + +class MultipleReader(Reader): + def read(self, readers): + '''Return resuts from multiple `Reader`s. + + :param readers: a dictionary where the keys are specific names that are + used by the next item in the `Pipeline`, and the values are `Reader` + configurations. + :type readers: list[dict] + :return: The results of calling `Reader.read(**kwargs)` for each item + configured in `readers`. + :rtype: list[dict[str,object]] + ''' + + t0 = time() + self.logger.info(f'Executing "read" with {len(readers)} Readers') + + data = [] + for reader_config in readers: + reader_name = list(reader_config.keys())[0] + reader_class = getattr(sys.modules[__name__], reader_name) + reader = reader_class() + reader_kwargs = reader_config[reader_name] + + data.extend(reader.read(**reader_kwargs)) + + self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') + + return(data) + +class YAMLReader(Reader): + def _read(self, filename): + '''Return a dictionary from the contents of a yaml file. + + :param filename: name of the YAML file to read from + :return: the contents of `filename` + :rtype: dict + ''' + + import yaml + + with open(filename) as file: + data = yaml.safe_load(file) + return(data) + +class BinaryFileReader(Reader): + def _read(self, filename): + '''Return a content of a given file name + + :param filename: name of the binart file to read from + :return: the content of `filename` + :rtype: binary + ''' + with open(filename, 'rb') as file: + data = file.read() + return(data) + +class NexusReader(Reader): + def _read(self, filename, nxpath='/'): + '''Return the NeXus object stored at `nxpath` in the nexus file + `filename`. + + :param filename: name of the NeXus file to read from + :type filename: str + :param nxpath: path to a specific loaction in the NeXus file to read + from, defaults to `'/'` + :type nxpath: str, optional + :raises nexusformat.nexus.NeXusError: if `filename` is not a NeXus + file or `nxpath` is not in `filename`. + :return: the NeXus structure indicated by `filename` and `nxpath`. + :rtype: nexusformat.nexus.NXobject + ''' + + from nexusformat.nexus import nxload + + nxobject = nxload(filename)[nxpath] + return(nxobject) + +class URLReader(Reader): + def _read(self, url, headers={}): + '''Make an HTTPS request to the provided URL and return the results. + Headers for the request are optional. + + :param url: the URL to read + :type url: str + :param headers: headers to attach to the request, defaults to `{}` + :type headers: dict, optional + :return: the content of the response + :rtype: object + ''' + + import requests + + resp = requests.get(url, headers=headers) + data = resp.content + + self.logger.debug(f'Response content: {data}') + + return(data) + +class OptionParser(): + '''User based option parser''' + def __init__(self): + self.parser = argparse.ArgumentParser(prog='PROG') + self.parser.add_argument("--filename", action="store", + dest="filename", default="", help="Input file") + self.parser.add_argument("--reader", action="store", + dest="reader", default="Reader", help="Reader class name") + self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), + dest='log_level', default='INFO', help='logging level') + +def main(): + '''Main function''' + optmgr = OptionParser() + opts = optmgr.parser.parse_args() + clsName = opts.reader + try: + readerCls = getattr(sys.modules[__name__],clsName) + except: + print(f'Unsupported reader {clsName}') + sys.exit(1) + + reader = readerCls() + reader.logger.setLevel(getattr(logging, opts.log_level)) + log_handler = logging.StreamHandler() + log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) + reader.logger.addHandler(log_handler) + data = reader.read(filename=opts.filename) + + print(f"Reader {reader} reads from {opts.filename}, data {data}") + +if __name__ == '__main__': + main()
