Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/docutils/io.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 # $Id: io.py 7596 2013-01-25 13:42:17Z milde $ | |
2 # Author: David Goodger <goodger@python.org> | |
3 # Copyright: This module has been placed in the public domain. | |
4 | |
5 """ | |
6 I/O classes provide a uniform API for low-level input and output. Subclasses | |
7 exist for a variety of input/output mechanisms. | |
8 """ | |
9 | |
10 __docformat__ = 'reStructuredText' | |
11 | |
12 import sys | |
13 import os | |
14 import re | |
15 import codecs | |
16 from docutils import TransformSpec | |
17 from docutils._compat import b | |
18 from docutils.utils.error_reporting import locale_encoding, ErrorString, ErrorOutput | |
19 | |
20 | |
21 class InputError(IOError): pass | |
22 class OutputError(IOError): pass | |
23 | |
24 def check_encoding(stream, encoding): | |
25 """Test, whether the encoding of `stream` matches `encoding`. | |
26 | |
27 Returns | |
28 | |
29 :None: if `encoding` or `stream.encoding` are not a valid encoding | |
30 argument (e.g. ``None``) or `stream.encoding is missing. | |
31 :True: if the encoding argument resolves to the same value as `encoding`, | |
32 :False: if the encodings differ. | |
33 """ | |
34 try: | |
35 return codecs.lookup(stream.encoding) == codecs.lookup(encoding) | |
36 except (LookupError, AttributeError, TypeError): | |
37 return None | |
38 | |
39 | |
40 class Input(TransformSpec): | |
41 | |
42 """ | |
43 Abstract base class for input wrappers. | |
44 """ | |
45 | |
46 component_type = 'input' | |
47 | |
48 default_source_path = None | |
49 | |
50 def __init__(self, source=None, source_path=None, encoding=None, | |
51 error_handler='strict'): | |
52 self.encoding = encoding | |
53 """Text encoding for the input source.""" | |
54 | |
55 self.error_handler = error_handler | |
56 """Text decoding error handler.""" | |
57 | |
58 self.source = source | |
59 """The source of input data.""" | |
60 | |
61 self.source_path = source_path | |
62 """A text reference to the source.""" | |
63 | |
64 if not source_path: | |
65 self.source_path = self.default_source_path | |
66 | |
67 self.successful_encoding = None | |
68 """The encoding that successfully decoded the source data.""" | |
69 | |
70 def __repr__(self): | |
71 return '%s: source=%r, source_path=%r' % (self.__class__, self.source, | |
72 self.source_path) | |
73 | |
74 def read(self): | |
75 raise NotImplementedError | |
76 | |
77 def decode(self, data): | |
78 """ | |
79 Decode a string, `data`, heuristically. | |
80 Raise UnicodeError if unsuccessful. | |
81 | |
82 The client application should call ``locale.setlocale`` at the | |
83 beginning of processing:: | |
84 | |
85 locale.setlocale(locale.LC_ALL, '') | |
86 """ | |
87 if self.encoding and self.encoding.lower() == 'unicode': | |
88 assert isinstance(data, unicode), ( | |
89 'input encoding is "unicode" ' | |
90 'but input is not a unicode object') | |
91 if isinstance(data, unicode): | |
92 # Accept unicode even if self.encoding != 'unicode'. | |
93 return data | |
94 if self.encoding: | |
95 # We believe the user/application when the encoding is | |
96 # explicitly given. | |
97 encodings = [self.encoding] | |
98 else: | |
99 data_encoding = self.determine_encoding_from_data(data) | |
100 if data_encoding: | |
101 # If the data declares its encoding (explicitly or via a BOM), | |
102 # we believe it. | |
103 encodings = [data_encoding] | |
104 else: | |
105 # Apply heuristics only if no encoding is explicitly given and | |
106 # no BOM found. Start with UTF-8, because that only matches | |
107 # data that *IS* UTF-8: | |
108 encodings = ['utf-8', 'latin-1'] | |
109 if locale_encoding: | |
110 encodings.insert(1, locale_encoding) | |
111 for enc in encodings: | |
112 try: | |
113 decoded = unicode(data, enc, self.error_handler) | |
114 self.successful_encoding = enc | |
115 # Return decoded, removing BOMs. | |
116 return decoded.replace(u'\ufeff', u'') | |
117 except (UnicodeError, LookupError), err: | |
118 error = err # in Python 3, the <exception instance> is | |
119 # local to the except clause | |
120 raise UnicodeError( | |
121 'Unable to decode input data. Tried the following encodings: ' | |
122 '%s.\n(%s)' % (', '.join([repr(enc) for enc in encodings]), | |
123 ErrorString(error))) | |
124 | |
125 coding_slug = re.compile(b("coding[:=]\s*([-\w.]+)")) | |
126 """Encoding declaration pattern.""" | |
127 | |
128 byte_order_marks = ((codecs.BOM_UTF8, 'utf-8'), # 'utf-8-sig' new in v2.5 | |
129 (codecs.BOM_UTF16_BE, 'utf-16-be'), | |
130 (codecs.BOM_UTF16_LE, 'utf-16-le'),) | |
131 """Sequence of (start_bytes, encoding) tuples for encoding detection. | |
132 The first bytes of input data are checked against the start_bytes strings. | |
133 A match indicates the given encoding.""" | |
134 | |
135 def determine_encoding_from_data(self, data): | |
136 """ | |
137 Try to determine the encoding of `data` by looking *in* `data`. | |
138 Check for a byte order mark (BOM) or an encoding declaration. | |
139 """ | |
140 # check for a byte order mark: | |
141 for start_bytes, encoding in self.byte_order_marks: | |
142 if data.startswith(start_bytes): | |
143 return encoding | |
144 # check for an encoding declaration pattern in first 2 lines of file: | |
145 for line in data.splitlines()[:2]: | |
146 match = self.coding_slug.search(line) | |
147 if match: | |
148 return match.group(1).decode('ascii') | |
149 return None | |
150 | |
151 | |
152 class Output(TransformSpec): | |
153 | |
154 """ | |
155 Abstract base class for output wrappers. | |
156 """ | |
157 | |
158 component_type = 'output' | |
159 | |
160 default_destination_path = None | |
161 | |
162 def __init__(self, destination=None, destination_path=None, | |
163 encoding=None, error_handler='strict'): | |
164 self.encoding = encoding | |
165 """Text encoding for the output destination.""" | |
166 | |
167 self.error_handler = error_handler or 'strict' | |
168 """Text encoding error handler.""" | |
169 | |
170 self.destination = destination | |
171 """The destination for output data.""" | |
172 | |
173 self.destination_path = destination_path | |
174 """A text reference to the destination.""" | |
175 | |
176 if not destination_path: | |
177 self.destination_path = self.default_destination_path | |
178 | |
179 def __repr__(self): | |
180 return ('%s: destination=%r, destination_path=%r' | |
181 % (self.__class__, self.destination, self.destination_path)) | |
182 | |
183 def write(self, data): | |
184 """`data` is a Unicode string, to be encoded by `self.encode`.""" | |
185 raise NotImplementedError | |
186 | |
187 def encode(self, data): | |
188 if self.encoding and self.encoding.lower() == 'unicode': | |
189 assert isinstance(data, unicode), ( | |
190 'the encoding given is "unicode" but the output is not ' | |
191 'a Unicode string') | |
192 return data | |
193 if not isinstance(data, unicode): | |
194 # Non-unicode (e.g. bytes) output. | |
195 return data | |
196 else: | |
197 return data.encode(self.encoding, self.error_handler) | |
198 | |
199 | |
200 class FileInput(Input): | |
201 | |
202 """ | |
203 Input for single, simple file-like objects. | |
204 """ | |
205 def __init__(self, source=None, source_path=None, | |
206 encoding=None, error_handler='strict', | |
207 autoclose=True, handle_io_errors=None, mode='rU'): | |
208 """ | |
209 :Parameters: | |
210 - `source`: either a file-like object (which is read directly), or | |
211 `None` (which implies `sys.stdin` if no `source_path` given). | |
212 - `source_path`: a path to a file, which is opened and then read. | |
213 - `encoding`: the expected text encoding of the input file. | |
214 - `error_handler`: the encoding error handler to use. | |
215 - `autoclose`: close automatically after read (except when | |
216 `sys.stdin` is the source). | |
217 - `handle_io_errors`: ignored, deprecated, will be removed. | |
218 - `mode`: how the file is to be opened (see standard function | |
219 `open`). The default 'rU' provides universal newline support | |
220 for text files. | |
221 """ | |
222 Input.__init__(self, source, source_path, encoding, error_handler) | |
223 self.autoclose = autoclose | |
224 self._stderr = ErrorOutput() | |
225 | |
226 if source is None: | |
227 if source_path: | |
228 # Specify encoding in Python 3 | |
229 if sys.version_info >= (3,0): | |
230 kwargs = {'encoding': self.encoding, | |
231 'errors': self.error_handler} | |
232 else: | |
233 kwargs = {} | |
234 | |
235 try: | |
236 self.source = open(source_path, mode, **kwargs) | |
237 except IOError, error: | |
238 raise InputError(error.errno, error.strerror, source_path) | |
239 else: | |
240 self.source = sys.stdin | |
241 elif (sys.version_info >= (3,0) and | |
242 check_encoding(self.source, self.encoding) is False): | |
243 # TODO: re-open, warn or raise error? | |
244 raise UnicodeError('Encoding clash: encoding given is "%s" ' | |
245 'but source is opened with encoding "%s".' % | |
246 (self.encoding, self.source.encoding)) | |
247 if not source_path: | |
248 try: | |
249 self.source_path = self.source.name | |
250 except AttributeError: | |
251 pass | |
252 | |
253 def read(self): | |
254 """ | |
255 Read and decode a single file and return the data (Unicode string). | |
256 """ | |
257 try: # In Python < 2.5, try...except has to be nested in try...finally. | |
258 try: | |
259 if self.source is sys.stdin and sys.version_info >= (3,0): | |
260 # read as binary data to circumvent auto-decoding | |
261 data = self.source.buffer.read() | |
262 # normalize newlines | |
263 data = b('\n').join(data.splitlines()) + b('\n') | |
264 else: | |
265 data = self.source.read() | |
266 except (UnicodeError, LookupError), err: # (in Py3k read() decodes) | |
267 if not self.encoding and self.source_path: | |
268 # re-read in binary mode and decode with heuristics | |
269 b_source = open(self.source_path, 'rb') | |
270 data = b_source.read() | |
271 b_source.close() | |
272 # normalize newlines | |
273 data = b('\n').join(data.splitlines()) + b('\n') | |
274 else: | |
275 raise | |
276 finally: | |
277 if self.autoclose: | |
278 self.close() | |
279 return self.decode(data) | |
280 | |
281 def readlines(self): | |
282 """ | |
283 Return lines of a single file as list of Unicode strings. | |
284 """ | |
285 return self.read().splitlines(True) | |
286 | |
287 def close(self): | |
288 if self.source is not sys.stdin: | |
289 self.source.close() | |
290 | |
291 | |
292 class FileOutput(Output): | |
293 | |
294 """ | |
295 Output for single, simple file-like objects. | |
296 """ | |
297 | |
298 mode = 'w' | |
299 """The mode argument for `open()`.""" | |
300 # 'wb' for binary (e.g. OpenOffice) files (see also `BinaryFileOutput`). | |
301 # (Do not use binary mode ('wb') for text files, as this prevents the | |
302 # conversion of newlines to the system specific default.) | |
303 | |
304 def __init__(self, destination=None, destination_path=None, | |
305 encoding=None, error_handler='strict', autoclose=True, | |
306 handle_io_errors=None, mode=None): | |
307 """ | |
308 :Parameters: | |
309 - `destination`: either a file-like object (which is written | |
310 directly) or `None` (which implies `sys.stdout` if no | |
311 `destination_path` given). | |
312 - `destination_path`: a path to a file, which is opened and then | |
313 written. | |
314 - `encoding`: the text encoding of the output file. | |
315 - `error_handler`: the encoding error handler to use. | |
316 - `autoclose`: close automatically after write (except when | |
317 `sys.stdout` or `sys.stderr` is the destination). | |
318 - `handle_io_errors`: ignored, deprecated, will be removed. | |
319 - `mode`: how the file is to be opened (see standard function | |
320 `open`). The default is 'w', providing universal newline | |
321 support for text files. | |
322 """ | |
323 Output.__init__(self, destination, destination_path, | |
324 encoding, error_handler) | |
325 self.opened = True | |
326 self.autoclose = autoclose | |
327 if mode is not None: | |
328 self.mode = mode | |
329 self._stderr = ErrorOutput() | |
330 if destination is None: | |
331 if destination_path: | |
332 self.opened = False | |
333 else: | |
334 self.destination = sys.stdout | |
335 elif (# destination is file-type object -> check mode: | |
336 mode and hasattr(self.destination, 'mode') | |
337 and mode != self.destination.mode): | |
338 print >>self._stderr, ('Warning: Destination mode "%s" ' | |
339 'differs from specified mode "%s"' % | |
340 (self.destination.mode, mode)) | |
341 if not destination_path: | |
342 try: | |
343 self.destination_path = self.destination.name | |
344 except AttributeError: | |
345 pass | |
346 | |
347 def open(self): | |
348 # Specify encoding in Python 3. | |
349 if sys.version_info >= (3,0) and 'b' not in self.mode: | |
350 kwargs = {'encoding': self.encoding, | |
351 'errors': self.error_handler} | |
352 else: | |
353 kwargs = {} | |
354 try: | |
355 self.destination = open(self.destination_path, self.mode, **kwargs) | |
356 except IOError, error: | |
357 raise OutputError(error.errno, error.strerror, | |
358 self.destination_path) | |
359 self.opened = True | |
360 | |
361 def write(self, data): | |
362 """Encode `data`, write it to a single file, and return it. | |
363 | |
364 With Python 3 or binary output mode, `data` is returned unchanged, | |
365 except when specified encoding and output encoding differ. | |
366 """ | |
367 if not self.opened: | |
368 self.open() | |
369 if ('b' not in self.mode and sys.version_info < (3,0) | |
370 or check_encoding(self.destination, self.encoding) is False | |
371 ): | |
372 if sys.version_info >= (3,0) and os.linesep != '\n': | |
373 data = data.replace('\n', os.linesep) # fix endings | |
374 data = self.encode(data) | |
375 | |
376 try: # In Python < 2.5, try...except has to be nested in try...finally. | |
377 try: | |
378 self.destination.write(data) | |
379 except TypeError, e: | |
380 if sys.version_info >= (3,0) and isinstance(data, bytes): | |
381 try: | |
382 self.destination.buffer.write(data) | |
383 except AttributeError: | |
384 if check_encoding(self.destination, | |
385 self.encoding) is False: | |
386 raise ValueError('Encoding of %s (%s) differs \n' | |
387 ' from specified encoding (%s)' % | |
388 (self.destination_path or 'destination', | |
389 self.destination.encoding, self.encoding)) | |
390 else: | |
391 raise e | |
392 except (UnicodeError, LookupError), err: | |
393 raise UnicodeError( | |
394 'Unable to encode output data. output-encoding is: ' | |
395 '%s.\n(%s)' % (self.encoding, ErrorString(err))) | |
396 finally: | |
397 if self.autoclose: | |
398 self.close() | |
399 return data | |
400 | |
401 def close(self): | |
402 if self.destination not in (sys.stdout, sys.stderr): | |
403 self.destination.close() | |
404 self.opened = False | |
405 | |
406 | |
407 class BinaryFileOutput(FileOutput): | |
408 """ | |
409 A version of docutils.io.FileOutput which writes to a binary file. | |
410 """ | |
411 # Used by core.publish_cmdline_to_binary() which in turn is used by | |
412 # rst2odt (OpenOffice writer) | |
413 mode = 'wb' | |
414 | |
415 | |
416 class StringInput(Input): | |
417 | |
418 """ | |
419 Direct string input. | |
420 """ | |
421 | |
422 default_source_path = '<string>' | |
423 | |
424 def read(self): | |
425 """Decode and return the source string.""" | |
426 return self.decode(self.source) | |
427 | |
428 | |
429 class StringOutput(Output): | |
430 | |
431 """ | |
432 Direct string output. | |
433 """ | |
434 | |
435 default_destination_path = '<string>' | |
436 | |
437 def write(self, data): | |
438 """Encode `data`, store it in `self.destination`, and return it.""" | |
439 self.destination = self.encode(data) | |
440 return self.destination | |
441 | |
442 | |
443 class NullInput(Input): | |
444 | |
445 """ | |
446 Degenerate input: read nothing. | |
447 """ | |
448 | |
449 default_source_path = 'null input' | |
450 | |
451 def read(self): | |
452 """Return a null string.""" | |
453 return u'' | |
454 | |
455 | |
456 class NullOutput(Output): | |
457 | |
458 """ | |
459 Degenerate output: write nothing. | |
460 """ | |
461 | |
462 default_destination_path = 'null output' | |
463 | |
464 def write(self, data): | |
465 """Do nothing ([don't even] send data to the bit bucket).""" | |
466 pass | |
467 | |
468 | |
469 class DocTreeInput(Input): | |
470 | |
471 """ | |
472 Adapter for document tree input. | |
473 | |
474 The document tree must be passed in the ``source`` parameter. | |
475 """ | |
476 | |
477 default_source_path = 'doctree input' | |
478 | |
479 def read(self): | |
480 """Return the document tree.""" | |
481 return self.source |