comparison build/bdist.linux-x86_64/egg/CHAP/reader.py @ 0:cbbe42422d56 draft

planemo upload for repository https://github.com/CHESSComputing/ChessAnalysisPipeline/tree/galaxy commit 1401a7e1ae007a6bda260d147f9b879e789b73e0-dirty
author kls286
date Tue, 28 Mar 2023 15:07:30 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:cbbe42422d56
1 #!/usr/bin/env python
2 """
3 File : reader.py
4 Author : Valentin Kuznetsov <vkuznet AT gmail dot com>
5 Description: generic Reader module
6 """
7
8 # system modules
9 import argparse
10 import json
11 import logging
12 import sys
13 from time import time
14
15 # local modules
16 # from pipeline import PipelineObject
17
18 class Reader():
19 """
20 Reader represent generic file writer
21 """
22
23 def __init__(self):
24 """
25 Constructor of Reader class
26 """
27 self.__name__ = self.__class__.__name__
28 self.logger = logging.getLogger(self.__name__)
29 self.logger.propagate = False
30
31 def read(self, type_=None, schema=None, encoding=None, **_read_kwargs):
32 '''Read API
33
34 Wrapper to read, format, and return the data requested.
35
36 :param type_: the expected type of data read from `filename`, defualts
37 to `None`
38 :type type_: type, optional
39 :param schema: the expected schema of the data read from `filename`,
40 defaults to `None`
41 :type schema: str, otional
42 :param _read_kwargs: keyword arguments to pass to `self._read`, defaults
43 to `{}`
44 :type _read_kwargs: dict, optional
45 :return: list with one item: a dictionary containing the data read from
46 `filename`, the name of this `Reader`, and the values of `type_` and
47 `schema`.
48 :rtype: list[dict[str,object]]
49 '''
50
51 t0 = time()
52 self.logger.info(f'Executing "read" with type={type_}, schema={schema}, kwargs={_read_kwargs}')
53
54 data = [{'name': self.__name__,
55 'data': self._read(**_read_kwargs),
56 'type': type_,
57 'schema': schema,
58 'encoding': encoding}]
59
60 self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n')
61 return(data)
62
63 def _read(self, filename):
64 '''Read and return the data from requested from `filename`
65
66 :param filename: Name of file to read from
67 :return: specific number of bytes from a file
68 '''
69
70 if not filename:
71 self.logger.warning('No file name is given, will skip read operation')
72 return None
73
74 with open(filename) as file:
75 data = file.read()
76 return(data)
77
78 class MultipleReader(Reader):
79 def read(self, readers):
80 '''Return resuts from multiple `Reader`s.
81
82 :param readers: a dictionary where the keys are specific names that are
83 used by the next item in the `Pipeline`, and the values are `Reader`
84 configurations.
85 :type readers: list[dict]
86 :return: The results of calling `Reader.read(**kwargs)` for each item
87 configured in `readers`.
88 :rtype: list[dict[str,object]]
89 '''
90
91 t0 = time()
92 self.logger.info(f'Executing "read" with {len(readers)} Readers')
93
94 data = []
95 for reader_config in readers:
96 reader_name = list(reader_config.keys())[0]
97 reader_class = getattr(sys.modules[__name__], reader_name)
98 reader = reader_class()
99 reader_kwargs = reader_config[reader_name]
100
101 data.extend(reader.read(**reader_kwargs))
102
103 self.logger.info(f'Finished "read" in {time()-t0:.3f} seconds\n')
104
105 return(data)
106
107 class YAMLReader(Reader):
108 def _read(self, filename):
109 '''Return a dictionary from the contents of a yaml file.
110
111 :param filename: name of the YAML file to read from
112 :return: the contents of `filename`
113 :rtype: dict
114 '''
115
116 import yaml
117
118 with open(filename) as file:
119 data = yaml.safe_load(file)
120 return(data)
121
122 class BinaryFileReader(Reader):
123 def _read(self, filename):
124 '''Return a content of a given file name
125
126 :param filename: name of the binart file to read from
127 :return: the content of `filename`
128 :rtype: binary
129 '''
130 with open(filename, 'rb') as file:
131 data = file.read()
132 return(data)
133
134 class NexusReader(Reader):
135 def _read(self, filename, nxpath='/'):
136 '''Return the NeXus object stored at `nxpath` in the nexus file
137 `filename`.
138
139 :param filename: name of the NeXus file to read from
140 :type filename: str
141 :param nxpath: path to a specific loaction in the NeXus file to read
142 from, defaults to `'/'`
143 :type nxpath: str, optional
144 :raises nexusformat.nexus.NeXusError: if `filename` is not a NeXus
145 file or `nxpath` is not in `filename`.
146 :return: the NeXus structure indicated by `filename` and `nxpath`.
147 :rtype: nexusformat.nexus.NXobject
148 '''
149
150 from nexusformat.nexus import nxload
151
152 nxobject = nxload(filename)[nxpath]
153 return(nxobject)
154
155 class URLReader(Reader):
156 def _read(self, url, headers={}):
157 '''Make an HTTPS request to the provided URL and return the results.
158 Headers for the request are optional.
159
160 :param url: the URL to read
161 :type url: str
162 :param headers: headers to attach to the request, defaults to `{}`
163 :type headers: dict, optional
164 :return: the content of the response
165 :rtype: object
166 '''
167
168 import requests
169
170 resp = requests.get(url, headers=headers)
171 data = resp.content
172
173 self.logger.debug(f'Response content: {data}')
174
175 return(data)
176
177 class OptionParser():
178 '''User based option parser'''
179 def __init__(self):
180 self.parser = argparse.ArgumentParser(prog='PROG')
181 self.parser.add_argument("--filename", action="store",
182 dest="filename", default="", help="Input file")
183 self.parser.add_argument("--reader", action="store",
184 dest="reader", default="Reader", help="Reader class name")
185 self.parser.add_argument('--log-level', choices=logging._nameToLevel.keys(),
186 dest='log_level', default='INFO', help='logging level')
187
188 def main():
189 '''Main function'''
190 optmgr = OptionParser()
191 opts = optmgr.parser.parse_args()
192 clsName = opts.reader
193 try:
194 readerCls = getattr(sys.modules[__name__],clsName)
195 except:
196 print(f'Unsupported reader {clsName}')
197 sys.exit(1)
198
199 reader = readerCls()
200 reader.logger.setLevel(getattr(logging, opts.log_level))
201 log_handler = logging.StreamHandler()
202 log_handler.setFormatter(logging.Formatter('{name:20}: {message}', style='{'))
203 reader.logger.addHandler(log_handler)
204 data = reader.read(filename=opts.filename)
205
206 print(f"Reader {reader} reads from {opts.filename}, data {data}")
207
208 if __name__ == '__main__':
209 main()