Mercurial > repos > kls286 > chap_test_20230328
comparison CHAP/reader.py @ 0:cbbe42422d56 draft
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
| author | kls286 |
|---|---|
| date | Tue, 28 Mar 2023 15:07:30 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cbbe42422d56 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 File : reader.py | |
| 4 Author : Valentin Kuznetsov <vkuznet AT gmail dot com> | |
| 5 Description: generic Reader module | |
| 6 """ | |
| 7 | |
| 8 # system modules | |
| 9 import argparse | |
| 10 import json | |
| 11 import logging | |
| 12 import sys | |
| 13 from time import time | |
| 14 | |
| 15 # local modules | |
| 16 # from pipeline import PipelineObject | |
| 17 | |
| 18 class Reader(): | |
| 19 """ | |
| 20 Reader represent generic file writer | |
| 21 """ | |
| 22 | |
| 23 def __init__(self): | |
| 24 """ | |
| 25 Constructor of Reader class | |
| 26 """ | |
| 27 self.__name__ = self.__class__.__name__ | |
| 28 self.logger = logging.getLogger(self.__name__) | |
| 29 self.logger.propagate = False | |
| 30 | |
| 31 def read(self, type_=None, schema=None, encoding=None, **_read_kwargs): | |
| 32 '''Read API | |
| 33 | |
| 34 Wrapper to read, format, and return the data requested. | |
| 35 | |
| 36 :param type_: the expected type of data read from `filename`, defualts | |
| 37 to `None` | |
| 38 :type type_: type, optional | |
| 39 :param schema: the expected schema of the data read from `filename`, | |
| 40 defaults to `None` | |
| 41 :type schema: str, otional | |
| 42 :param _read_kwargs: keyword arguments to pass to `self._read`, defaults | |
| 43 to `{}` | |
| 44 :type _read_kwargs: dict, optional | |
| 45 :return: list with one item: a dictionary containing the data read from | |
| 46 `filename`, the name of this `Reader`, and the values of `type_` and | |
| 47 `schema`. | |
| 48 :rtype: list[dict[str,object]] | |
| 49 ''' | |
| 50 | |
| 51 t0 = time() | |
| 52 self.logger.info(f'Executing "read" with type={type_}, schema={schema}, kwargs={_read_kwargs}') | |
| 53 | |
| 54 data = [{'name': self.__name__, | |
| 55 'data': self._read(**_read_kwargs), | |
| 56 'type': type_, | |
| 57 'schema': schema, | |
| 58 'encoding': encoding}] | |
| 59 | |
| 60 self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') | |
| 61 return(data) | |
| 62 | |
| 63 def _read(self, filename): | |
| 64 '''Read and return the data from requested from `filename` | |
| 65 | |
| 66 :param filename: Name of file to read from | |
| 67 :return: specific number of bytes from a file | |
| 68 ''' | |
| 69 | |
| 70 if not filename: | |
| 71 self.logger.warning('No file name is given, will skip read operation') | |
| 72 return None | |
| 73 | |
| 74 with open(filename) as file: | |
| 75 data = file.read() | |
| 76 return(data) | |
| 77 | |
| 78 class MultipleReader(Reader): | |
| 79 def read(self, readers): | |
| 80 '''Return resuts from multiple `Reader`s. | |
| 81 | |
| 82 :param readers: a dictionary where the keys are specific names that are | |
| 83 used by the next item in the `Pipeline`, and the values are `Reader` | |
| 84 configurations. | |
| 85 :type readers: list[dict] | |
| 86 :return: The results of calling `Reader.read(**kwargs)` for each item | |
| 87 configured in `readers`. | |
| 88 :rtype: list[dict[str,object]] | |
| 89 ''' | |
| 90 | |
| 91 t0 = time() | |
| 92 self.logger.info(f'Executing "read" with {len(readers)} Readers') | |
| 93 | |
| 94 data = [] | |
| 95 for reader_config in readers: | |
| 96 reader_name = list(reader_config.keys())[0] | |
| 97 reader_class = getattr(sys.modules[__name__], reader_name) | |
| 98 reader = reader_class() | |
| 99 reader_kwargs = reader_config[reader_name] | |
| 100 | |
| 101 data.extend(reader.read(**reader_kwargs)) | |
| 102 | |
| 103 self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') | |
| 104 | |
| 105 return(data) | |
| 106 | |
| 107 class YAMLReader(Reader): | |
| 108 def _read(self, filename): | |
| 109 '''Return a dictionary from the contents of a yaml file. | |
| 110 | |
| 111 :param filename: name of the YAML file to read from | |
| 112 :return: the contents of `filename` | |
| 113 :rtype: dict | |
| 114 ''' | |
| 115 | |
| 116 import yaml | |
| 117 | |
| 118 with open(filename) as file: | |
| 119 data = yaml.safe_load(file) | |
| 120 return(data) | |
| 121 | |
| 122 class BinaryFileReader(Reader): | |
| 123 def _read(self, filename): | |
| 124 '''Return a content of a given file name | |
| 125 | |
| 126 :param filename: name of the binart file to read from | |
| 127 :return: the content of `filename` | |
| 128 :rtype: binary | |
| 129 ''' | |
| 130 with open(filename, 'rb') as file: | |
| 131 data = file.read() | |
| 132 return(data) | |
| 133 | |
| 134 class NexusReader(Reader): | |
| 135 def _read(self, filename, nxpath='/'): | |
| 136 '''Return the NeXus object stored at `nxpath` in the nexus file | |
| 137 `filename`. | |
| 138 | |
| 139 :param filename: name of the NeXus file to read from | |
| 140 :type filename: str | |
| 141 :param nxpath: path to a specific loaction in the NeXus file to read | |
| 142 from, defaults to `'/'` | |
| 143 :type nxpath: str, optional | |
| 144 :raises nexusformat.nexus.NeXusError: if `filename` is not a NeXus | |
| 145 file or `nxpath` is not in `filename`. | |
| 146 :return: the NeXus structure indicated by `filename` and `nxpath`. | |
| 147 :rtype: nexusformat.nexus.NXobject | |
| 148 ''' | |
| 149 | |
| 150 from nexusformat.nexus import nxload | |
| 151 | |
| 152 nxobject = nxload(filename)[nxpath] | |
| 153 return(nxobject) | |
| 154 | |
| 155 class URLReader(Reader): | |
| 156 def _read(self, url, headers={}): | |
| 157 '''Make an HTTPS request to the provided URL and return the results. | |
| 158 Headers for the request are optional. | |
| 159 | |
| 160 :param url: the URL to read | |
| 161 :type url: str | |
| 162 :param headers: headers to attach to the request, defaults to `{}` | |
| 163 :type headers: dict, optional | |
| 164 :return: the content of the response | |
| 165 :rtype: object | |
| 166 ''' | |
| 167 | |
| 168 import requests | |
| 169 | |
| 170 resp = requests.get(url, headers=headers) | |
| 171 data = resp.content | |
| 172 | |
| 173 self.logger.debug(f'Response content: {data}') | |
| 174 | |
| 175 return(data) | |
| 176 | |
| 177 class OptionParser(): | |
| 178 '''User based option parser''' | |
| 179 def __init__(self): | |
| 180 self.parser = argparse.ArgumentParser(prog='PROG') | |
| 181 self.parser.add_argument("--filename", action="store", | |
| 182 dest="filename", default="", help="Input file") | |
| 183 self.parser.add_argument("--reader", action="store", | |
| 184 dest="reader", default="Reader", help="Reader class name") | |
| 185 self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), | |
| 186 dest='log_level', default='INFO', help='logging level') | |
| 187 | |
| 188 def main(): | |
| 189 '''Main function''' | |
| 190 optmgr = OptionParser() | |
| 191 opts = optmgr.parser.parse_args() | |
| 192 clsName = opts.reader | |
| 193 try: | |
| 194 readerCls = getattr(sys.modules[__name__],clsName) | |
| 195 except: | |
| 196 print(f'Unsupported reader {clsName}') | |
| 197 sys.exit(1) | |
| 198 | |
| 199 reader = readerCls() | |
| 200 reader.logger.setLevel(getattr(logging, opts.log_level)) | |
| 201 log_handler = logging.StreamHandler() | |
| 202 log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) | |
| 203 reader.logger.addHandler(log_handler) | |
| 204 data = reader.read(filename=opts.filename) | |
| 205 | |
| 206 print(f"Reader {reader} reads from {opts.filename}, data {data}") | |
| 207 | |
| 208 if __name__ == '__main__': | |
| 209 main() |
