view build/bdist.linux-x86_64/egg/CHAP/reader.py @ 0:cbbe42422d56 draft

planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
author kls286
date Tue, 28 Mar 2023 15:07:30 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python
"""
File       : reader.py
Author     : Valentin Kuznetsov <vkuznet AT gmail dot com>
Description: generic Reader module
"""

# system modules
import argparse
import json
import logging
import sys
from time import time

# local modules
# from pipeline import PipelineObject

class Reader():
    """
    Reader represent generic file writer
    """

    def __init__(self):
        """
        Constructor of Reader class
        """
        self.__name__ = self.__class__.__name__
        self.logger = logging.getLogger(self.__name__)
        self.logger.propagate = False

    def read(self, type_=None, schema=None, encoding=None, **_read_kwargs):
        '''Read API

        Wrapper to read, format, and return the data requested.

        :param type_: the expected type of data read from `filename`, defualts
            to `None`
        :type type_: type, optional
        :param schema: the expected schema of the data read from `filename`,
            defaults to `None`
        :type schema: str, otional
        :param _read_kwargs: keyword arguments to pass to `self._read`, defaults
            to `{}`
        :type _read_kwargs: dict, optional
        :return: list with one item: a dictionary containing the data read from
            `filename`, the name of this `Reader`, and the values of `type_` and
            `schema`.
        :rtype: list[dict[str,object]]
        '''

        t0 = time()
        self.logger.info(f'Executing "read" with type={type_}, schema={schema}, kwargs={_read_kwargs}')

        data = [{'name': self.__name__,
                 'data': self._read(**_read_kwargs),
                 'type': type_,
                 'schema': schema,
                 'encoding': encoding}]

        self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n')
        return(data)

    def _read(self, filename):
        '''Read and return the data from requested from `filename`

        :param filename: Name of file to read from
        :return: specific number of bytes from a file
        '''

        if not filename:
            self.logger.warning('No file name is given, will skip read operation')
            return None

        with open(filename) as file:
            data = file.read()
        return(data)

class MultipleReader(Reader):
    def read(self, readers):
        '''Return resuts from multiple `Reader`s.

        :param readers: a dictionary where the keys are specific names that are
            used by the next item in the `Pipeline`, and the values are `Reader`
            configurations.
        :type readers: list[dict]
        :return: The results of calling `Reader.read(**kwargs)` for each item
            configured in `readers`.
        :rtype: list[dict[str,object]]
        '''

        t0 = time()
        self.logger.info(f'Executing "read" with {len(readers)} Readers')

        data = []
        for reader_config in readers:
            reader_name = list(reader_config.keys())[0]
            reader_class = getattr(sys.modules[__name__], reader_name)
            reader = reader_class()
            reader_kwargs = reader_config[reader_name]

            data.extend(reader.read(**reader_kwargs))

        self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n')

        return(data)

class YAMLReader(Reader):
    def _read(self, filename):
        '''Return a dictionary from the contents of a yaml file.

        :param filename: name of the YAML file to read from
        :return: the contents of `filename`
        :rtype: dict
        '''

        import yaml

        with open(filename) as file:
            data = yaml.safe_load(file)
        return(data)

class BinaryFileReader(Reader):
    def _read(self, filename):
        '''Return a content of a given file name

        :param filename: name of the binart file to read from
        :return: the content of `filename`
        :rtype: binary
        '''
        with open(filename, 'rb') as file:
            data = file.read()
        return(data)

class NexusReader(Reader):
    def _read(self, filename, nxpath='/'):
        '''Return the NeXus object stored at `nxpath` in the nexus file
        `filename`.

        :param filename: name of the NeXus file to read from
        :type filename: str
        :param nxpath: path to a specific loaction in the NeXus file to read
            from, defaults to `'/'`
        :type nxpath: str, optional
        :raises nexusformat.nexus.NeXusError: if `filename` is not a NeXus
            file or `nxpath` is not in `filename`.
        :return: the NeXus structure indicated by `filename` and `nxpath`.
        :rtype: nexusformat.nexus.NXobject
        '''

        from nexusformat.nexus import nxload

        nxobject = nxload(filename)[nxpath]
        return(nxobject)

class URLReader(Reader):
    def _read(self, url, headers={}):
        '''Make an HTTPS request to the provided URL and return the results.
        Headers for the request are optional.

        :param url: the URL to read
        :type url: str
        :param headers: headers to attach to the request, defaults to `{}`
        :type headers: dict, optional
        :return: the content of the response
        :rtype: object
        '''

        import requests

        resp = requests.get(url, headers=headers)
        data = resp.content

        self.logger.debug(f'Response content: {data}')

        return(data)

class OptionParser():
    '''User based option parser'''
    def __init__(self):
        self.parser = argparse.ArgumentParser(prog='PROG')
        self.parser.add_argument("--filename", action="store",
            dest="filename", default="", help="Input file")
        self.parser.add_argument("--reader", action="store",
            dest="reader", default="Reader", help="Reader class name")
        self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(),
            dest='log_level', default='INFO', help='logging level')

def main():
    '''Main function'''
    optmgr  = OptionParser()
    opts = optmgr.parser.parse_args()
    clsName = opts.reader
    try:
        readerCls = getattr(sys.modules[__name__],clsName)
    except:
        print(f'Unsupported reader {clsName}')
        sys.exit(1)

    reader = readerCls()
    reader.logger.setLevel(getattr(logging, opts.log_level))
    log_handler = logging.StreamHandler()
    log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{'))
    reader.logger.addHandler(log_handler)
    data = reader.read(filename=opts.filename)

    print(f"Reader {reader} reads from {opts.filename}, data {data}")

if __name__ == '__main__':
    main()