Mercurial > repos > kls286 > chap_test_20230328
comparison build/bdist.linux-x86_64/egg/CHAP/reader.py @ 0:cbbe42422d56 draft
planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
author | kls286 |
---|---|
date | Tue, 28 Mar 2023 15:07:30 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cbbe42422d56 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 File : reader.py | |
4 Author : Valentin Kuznetsov <vkuznet AT gmail dot com> | |
5 Description: generic Reader module | |
6 """ | |
7 | |
8 # system modules | |
9 import argparse | |
10 import json | |
11 import logging | |
12 import sys | |
13 from time import time | |
14 | |
15 # local modules | |
16 # from pipeline import PipelineObject | |
17 | |
18 class Reader(): | |
19 """ | |
20 Reader represent generic file writer | |
21 """ | |
22 | |
23 def __init__(self): | |
24 """ | |
25 Constructor of Reader class | |
26 """ | |
27 self.__name__ = self.__class__.__name__ | |
28 self.logger = logging.getLogger(self.__name__) | |
29 self.logger.propagate = False | |
30 | |
31 def read(self, type_=None, schema=None, encoding=None, **_read_kwargs): | |
32 '''Read API | |
33 | |
34 Wrapper to read, format, and return the data requested. | |
35 | |
36 :param type_: the expected type of data read from `filename`, defualts | |
37 to `None` | |
38 :type type_: type, optional | |
39 :param schema: the expected schema of the data read from `filename`, | |
40 defaults to `None` | |
41 :type schema: str, otional | |
42 :param _read_kwargs: keyword arguments to pass to `self._read`, defaults | |
43 to `{}` | |
44 :type _read_kwargs: dict, optional | |
45 :return: list with one item: a dictionary containing the data read from | |
46 `filename`, the name of this `Reader`, and the values of `type_` and | |
47 `schema`. | |
48 :rtype: list[dict[str,object]] | |
49 ''' | |
50 | |
51 t0 = time() | |
52 self.logger.info(f'Executing "read" with type={type_}, schema={schema}, kwargs={_read_kwargs}') | |
53 | |
54 data = [{'name': self.__name__, | |
55 'data': self._read(**_read_kwargs), | |
56 'type': type_, | |
57 'schema': schema, | |
58 'encoding': encoding}] | |
59 | |
60 self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') | |
61 return(data) | |
62 | |
63 def _read(self, filename): | |
64 '''Read and return the data from requested from `filename` | |
65 | |
66 :param filename: Name of file to read from | |
67 :return: specific number of bytes from a file | |
68 ''' | |
69 | |
70 if not filename: | |
71 self.logger.warning('No file name is given, will skip read operation') | |
72 return None | |
73 | |
74 with open(filename) as file: | |
75 data = file.read() | |
76 return(data) | |
77 | |
78 class MultipleReader(Reader): | |
79 def read(self, readers): | |
80 '''Return resuts from multiple `Reader`s. | |
81 | |
82 :param readers: a dictionary where the keys are specific names that are | |
83 used by the next item in the `Pipeline`, and the values are `Reader` | |
84 configurations. | |
85 :type readers: list[dict] | |
86 :return: The results of calling `Reader.read(**kwargs)` for each item | |
87 configured in `readers`. | |
88 :rtype: list[dict[str,object]] | |
89 ''' | |
90 | |
91 t0 = time() | |
92 self.logger.info(f'Executing "read" with {len(readers)} Readers') | |
93 | |
94 data = [] | |
95 for reader_config in readers: | |
96 reader_name = list(reader_config.keys())[0] | |
97 reader_class = getattr(sys.modules[__name__], reader_name) | |
98 reader = reader_class() | |
99 reader_kwargs = reader_config[reader_name] | |
100 | |
101 data.extend(reader.read(**reader_kwargs)) | |
102 | |
103 self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n') | |
104 | |
105 return(data) | |
106 | |
107 class YAMLReader(Reader): | |
108 def _read(self, filename): | |
109 '''Return a dictionary from the contents of a yaml file. | |
110 | |
111 :param filename: name of the YAML file to read from | |
112 :return: the contents of `filename` | |
113 :rtype: dict | |
114 ''' | |
115 | |
116 import yaml | |
117 | |
118 with open(filename) as file: | |
119 data = yaml.safe_load(file) | |
120 return(data) | |
121 | |
122 class BinaryFileReader(Reader): | |
123 def _read(self, filename): | |
124 '''Return a content of a given file name | |
125 | |
126 :param filename: name of the binart file to read from | |
127 :return: the content of `filename` | |
128 :rtype: binary | |
129 ''' | |
130 with open(filename, 'rb') as file: | |
131 data = file.read() | |
132 return(data) | |
133 | |
134 class NexusReader(Reader): | |
135 def _read(self, filename, nxpath='/'): | |
136 '''Return the NeXus object stored at `nxpath` in the nexus file | |
137 `filename`. | |
138 | |
139 :param filename: name of the NeXus file to read from | |
140 :type filename: str | |
141 :param nxpath: path to a specific loaction in the NeXus file to read | |
142 from, defaults to `'/'` | |
143 :type nxpath: str, optional | |
144 :raises nexusformat.nexus.NeXusError: if `filename` is not a NeXus | |
145 file or `nxpath` is not in `filename`. | |
146 :return: the NeXus structure indicated by `filename` and `nxpath`. | |
147 :rtype: nexusformat.nexus.NXobject | |
148 ''' | |
149 | |
150 from nexusformat.nexus import nxload | |
151 | |
152 nxobject = nxload(filename)[nxpath] | |
153 return(nxobject) | |
154 | |
155 class URLReader(Reader): | |
156 def _read(self, url, headers={}): | |
157 '''Make an HTTPS request to the provided URL and return the results. | |
158 Headers for the request are optional. | |
159 | |
160 :param url: the URL to read | |
161 :type url: str | |
162 :param headers: headers to attach to the request, defaults to `{}` | |
163 :type headers: dict, optional | |
164 :return: the content of the response | |
165 :rtype: object | |
166 ''' | |
167 | |
168 import requests | |
169 | |
170 resp = requests.get(url, headers=headers) | |
171 data = resp.content | |
172 | |
173 self.logger.debug(f'Response content: {data}') | |
174 | |
175 return(data) | |
176 | |
177 class OptionParser(): | |
178 '''User based option parser''' | |
179 def __init__(self): | |
180 self.parser = argparse.ArgumentParser(prog='PROG') | |
181 self.parser.add_argument("--filename", action="store", | |
182 dest="filename", default="", help="Input file") | |
183 self.parser.add_argument("--reader", action="store", | |
184 dest="reader", default="Reader", help="Reader class name") | |
185 self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(), | |
186 dest='log_level', default='INFO', help='logging level') | |
187 | |
188 def main(): | |
189 '''Main function''' | |
190 optmgr = OptionParser() | |
191 opts = optmgr.parser.parse_args() | |
192 clsName = opts.reader | |
193 try: | |
194 readerCls = getattr(sys.modules[__name__],clsName) | |
195 except: | |
196 print(f'Unsupported reader {clsName}') | |
197 sys.exit(1) | |
198 | |
199 reader = readerCls() | |
200 reader.logger.setLevel(getattr(logging, opts.log_level)) | |
201 log_handler = logging.StreamHandler() | |
202 log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{')) | |
203 reader.logger.addHandler(log_handler) | |
204 data = reader.read(filename=opts.filename) | |
205 | |
206 print(f"Reader {reader} reads from {opts.filename}, data {data}") | |
207 | |
208 if __name__ == '__main__': | |
209 main() |