Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/docutils/io.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
| author | bcclaywell |
|---|---|
| date | Mon, 12 Oct 2015 17:43:33 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d67268158946 |
|---|---|
| 1 # $Id: io.py 7596 2013-01-25 13:42:17Z milde $ | |
| 2 # Author: David Goodger <goodger@python.org> | |
| 3 # Copyright: This module has been placed in the public domain. | |
| 4 | |
| 5 """ | |
| 6 I/O classes provide a uniform API for low-level input and output. Subclasses | |
| 7 exist for a variety of input/output mechanisms. | |
| 8 """ | |
| 9 | |
| 10 __docformat__ = 'reStructuredText' | |
| 11 | |
| 12 import sys | |
| 13 import os | |
| 14 import re | |
| 15 import codecs | |
| 16 from docutils import TransformSpec | |
| 17 from docutils._compat import b | |
| 18 from docutils.utils.error_reporting import locale_encoding, ErrorString, ErrorOutput | |
| 19 | |
| 20 | |
| 21 class InputError(IOError): pass | |
| 22 class OutputError(IOError): pass | |
| 23 | |
| 24 def check_encoding(stream, encoding): | |
| 25 """Test, whether the encoding of `stream` matches `encoding`. | |
| 26 | |
| 27 Returns | |
| 28 | |
| 29 :None: if `encoding` or `stream.encoding` are not a valid encoding | |
| 30 argument (e.g. ``None``) or `stream.encoding is missing. | |
| 31 :True: if the encoding argument resolves to the same value as `encoding`, | |
| 32 :False: if the encodings differ. | |
| 33 """ | |
| 34 try: | |
| 35 return codecs.lookup(stream.encoding) == codecs.lookup(encoding) | |
| 36 except (LookupError, AttributeError, TypeError): | |
| 37 return None | |
| 38 | |
| 39 | |
| 40 class Input(TransformSpec): | |
| 41 | |
| 42 """ | |
| 43 Abstract base class for input wrappers. | |
| 44 """ | |
| 45 | |
| 46 component_type = 'input' | |
| 47 | |
| 48 default_source_path = None | |
| 49 | |
| 50 def __init__(self, source=None, source_path=None, encoding=None, | |
| 51 error_handler='strict'): | |
| 52 self.encoding = encoding | |
| 53 """Text encoding for the input source.""" | |
| 54 | |
| 55 self.error_handler = error_handler | |
| 56 """Text decoding error handler.""" | |
| 57 | |
| 58 self.source = source | |
| 59 """The source of input data.""" | |
| 60 | |
| 61 self.source_path = source_path | |
| 62 """A text reference to the source.""" | |
| 63 | |
| 64 if not source_path: | |
| 65 self.source_path = self.default_source_path | |
| 66 | |
| 67 self.successful_encoding = None | |
| 68 """The encoding that successfully decoded the source data.""" | |
| 69 | |
| 70 def __repr__(self): | |
| 71 return '%s: source=%r, source_path=%r' % (self.__class__, self.source, | |
| 72 self.source_path) | |
| 73 | |
| 74 def read(self): | |
| 75 raise NotImplementedError | |
| 76 | |
| 77 def decode(self, data): | |
| 78 """ | |
| 79 Decode a string, `data`, heuristically. | |
| 80 Raise UnicodeError if unsuccessful. | |
| 81 | |
| 82 The client application should call ``locale.setlocale`` at the | |
| 83 beginning of processing:: | |
| 84 | |
| 85 locale.setlocale(locale.LC_ALL, '') | |
| 86 """ | |
| 87 if self.encoding and self.encoding.lower() == 'unicode': | |
| 88 assert isinstance(data, unicode), ( | |
| 89 'input encoding is "unicode" ' | |
| 90 'but input is not a unicode object') | |
| 91 if isinstance(data, unicode): | |
| 92 # Accept unicode even if self.encoding != 'unicode'. | |
| 93 return data | |
| 94 if self.encoding: | |
| 95 # We believe the user/application when the encoding is | |
| 96 # explicitly given. | |
| 97 encodings = [self.encoding] | |
| 98 else: | |
| 99 data_encoding = self.determine_encoding_from_data(data) | |
| 100 if data_encoding: | |
| 101 # If the data declares its encoding (explicitly or via a BOM), | |
| 102 # we believe it. | |
| 103 encodings = [data_encoding] | |
| 104 else: | |
| 105 # Apply heuristics only if no encoding is explicitly given and | |
| 106 # no BOM found. Start with UTF-8, because that only matches | |
| 107 # data that *IS* UTF-8: | |
| 108 encodings = ['utf-8', 'latin-1'] | |
| 109 if locale_encoding: | |
| 110 encodings.insert(1, locale_encoding) | |
| 111 for enc in encodings: | |
| 112 try: | |
| 113 decoded = unicode(data, enc, self.error_handler) | |
| 114 self.successful_encoding = enc | |
| 115 # Return decoded, removing BOMs. | |
| 116 return decoded.replace(u'\ufeff', u'') | |
| 117 except (UnicodeError, LookupError), err: | |
| 118 error = err # in Python 3, the <exception instance> is | |
| 119 # local to the except clause | |
| 120 raise UnicodeError( | |
| 121 'Unable to decode input data. Tried the following encodings: ' | |
| 122 '%s.\n(%s)' % (', '.join([repr(enc) for enc in encodings]), | |
| 123 ErrorString(error))) | |
| 124 | |
| 125 coding_slug = re.compile(b("coding[:=]\s*([-\w.]+)")) | |
| 126 """Encoding declaration pattern.""" | |
| 127 | |
| 128 byte_order_marks = ((codecs.BOM_UTF8, 'utf-8'), # 'utf-8-sig' new in v2.5 | |
| 129 (codecs.BOM_UTF16_BE, 'utf-16-be'), | |
| 130 (codecs.BOM_UTF16_LE, 'utf-16-le'),) | |
| 131 """Sequence of (start_bytes, encoding) tuples for encoding detection. | |
| 132 The first bytes of input data are checked against the start_bytes strings. | |
| 133 A match indicates the given encoding.""" | |
| 134 | |
| 135 def determine_encoding_from_data(self, data): | |
| 136 """ | |
| 137 Try to determine the encoding of `data` by looking *in* `data`. | |
| 138 Check for a byte order mark (BOM) or an encoding declaration. | |
| 139 """ | |
| 140 # check for a byte order mark: | |
| 141 for start_bytes, encoding in self.byte_order_marks: | |
| 142 if data.startswith(start_bytes): | |
| 143 return encoding | |
| 144 # check for an encoding declaration pattern in first 2 lines of file: | |
| 145 for line in data.splitlines()[:2]: | |
| 146 match = self.coding_slug.search(line) | |
| 147 if match: | |
| 148 return match.group(1).decode('ascii') | |
| 149 return None | |
| 150 | |
| 151 | |
| 152 class Output(TransformSpec): | |
| 153 | |
| 154 """ | |
| 155 Abstract base class for output wrappers. | |
| 156 """ | |
| 157 | |
| 158 component_type = 'output' | |
| 159 | |
| 160 default_destination_path = None | |
| 161 | |
| 162 def __init__(self, destination=None, destination_path=None, | |
| 163 encoding=None, error_handler='strict'): | |
| 164 self.encoding = encoding | |
| 165 """Text encoding for the output destination.""" | |
| 166 | |
| 167 self.error_handler = error_handler or 'strict' | |
| 168 """Text encoding error handler.""" | |
| 169 | |
| 170 self.destination = destination | |
| 171 """The destination for output data.""" | |
| 172 | |
| 173 self.destination_path = destination_path | |
| 174 """A text reference to the destination.""" | |
| 175 | |
| 176 if not destination_path: | |
| 177 self.destination_path = self.default_destination_path | |
| 178 | |
| 179 def __repr__(self): | |
| 180 return ('%s: destination=%r, destination_path=%r' | |
| 181 % (self.__class__, self.destination, self.destination_path)) | |
| 182 | |
| 183 def write(self, data): | |
| 184 """`data` is a Unicode string, to be encoded by `self.encode`.""" | |
| 185 raise NotImplementedError | |
| 186 | |
| 187 def encode(self, data): | |
| 188 if self.encoding and self.encoding.lower() == 'unicode': | |
| 189 assert isinstance(data, unicode), ( | |
| 190 'the encoding given is "unicode" but the output is not ' | |
| 191 'a Unicode string') | |
| 192 return data | |
| 193 if not isinstance(data, unicode): | |
| 194 # Non-unicode (e.g. bytes) output. | |
| 195 return data | |
| 196 else: | |
| 197 return data.encode(self.encoding, self.error_handler) | |
| 198 | |
| 199 | |
| 200 class FileInput(Input): | |
| 201 | |
| 202 """ | |
| 203 Input for single, simple file-like objects. | |
| 204 """ | |
| 205 def __init__(self, source=None, source_path=None, | |
| 206 encoding=None, error_handler='strict', | |
| 207 autoclose=True, handle_io_errors=None, mode='rU'): | |
| 208 """ | |
| 209 :Parameters: | |
| 210 - `source`: either a file-like object (which is read directly), or | |
| 211 `None` (which implies `sys.stdin` if no `source_path` given). | |
| 212 - `source_path`: a path to a file, which is opened and then read. | |
| 213 - `encoding`: the expected text encoding of the input file. | |
| 214 - `error_handler`: the encoding error handler to use. | |
| 215 - `autoclose`: close automatically after read (except when | |
| 216 `sys.stdin` is the source). | |
| 217 - `handle_io_errors`: ignored, deprecated, will be removed. | |
| 218 - `mode`: how the file is to be opened (see standard function | |
| 219 `open`). The default 'rU' provides universal newline support | |
| 220 for text files. | |
| 221 """ | |
| 222 Input.__init__(self, source, source_path, encoding, error_handler) | |
| 223 self.autoclose = autoclose | |
| 224 self._stderr = ErrorOutput() | |
| 225 | |
| 226 if source is None: | |
| 227 if source_path: | |
| 228 # Specify encoding in Python 3 | |
| 229 if sys.version_info >= (3,0): | |
| 230 kwargs = {'encoding': self.encoding, | |
| 231 'errors': self.error_handler} | |
| 232 else: | |
| 233 kwargs = {} | |
| 234 | |
| 235 try: | |
| 236 self.source = open(source_path, mode, **kwargs) | |
| 237 except IOError, error: | |
| 238 raise InputError(error.errno, error.strerror, source_path) | |
| 239 else: | |
| 240 self.source = sys.stdin | |
| 241 elif (sys.version_info >= (3,0) and | |
| 242 check_encoding(self.source, self.encoding) is False): | |
| 243 # TODO: re-open, warn or raise error? | |
| 244 raise UnicodeError('Encoding clash: encoding given is "%s" ' | |
| 245 'but source is opened with encoding "%s".' % | |
| 246 (self.encoding, self.source.encoding)) | |
| 247 if not source_path: | |
| 248 try: | |
| 249 self.source_path = self.source.name | |
| 250 except AttributeError: | |
| 251 pass | |
| 252 | |
| 253 def read(self): | |
| 254 """ | |
| 255 Read and decode a single file and return the data (Unicode string). | |
| 256 """ | |
| 257 try: # In Python < 2.5, try...except has to be nested in try...finally. | |
| 258 try: | |
| 259 if self.source is sys.stdin and sys.version_info >= (3,0): | |
| 260 # read as binary data to circumvent auto-decoding | |
| 261 data = self.source.buffer.read() | |
| 262 # normalize newlines | |
| 263 data = b('\n').join(data.splitlines()) + b('\n') | |
| 264 else: | |
| 265 data = self.source.read() | |
| 266 except (UnicodeError, LookupError), err: # (in Py3k read() decodes) | |
| 267 if not self.encoding and self.source_path: | |
| 268 # re-read in binary mode and decode with heuristics | |
| 269 b_source = open(self.source_path, 'rb') | |
| 270 data = b_source.read() | |
| 271 b_source.close() | |
| 272 # normalize newlines | |
| 273 data = b('\n').join(data.splitlines()) + b('\n') | |
| 274 else: | |
| 275 raise | |
| 276 finally: | |
| 277 if self.autoclose: | |
| 278 self.close() | |
| 279 return self.decode(data) | |
| 280 | |
| 281 def readlines(self): | |
| 282 """ | |
| 283 Return lines of a single file as list of Unicode strings. | |
| 284 """ | |
| 285 return self.read().splitlines(True) | |
| 286 | |
| 287 def close(self): | |
| 288 if self.source is not sys.stdin: | |
| 289 self.source.close() | |
| 290 | |
| 291 | |
| 292 class FileOutput(Output): | |
| 293 | |
| 294 """ | |
| 295 Output for single, simple file-like objects. | |
| 296 """ | |
| 297 | |
| 298 mode = 'w' | |
| 299 """The mode argument for `open()`.""" | |
| 300 # 'wb' for binary (e.g. OpenOffice) files (see also `BinaryFileOutput`). | |
| 301 # (Do not use binary mode ('wb') for text files, as this prevents the | |
| 302 # conversion of newlines to the system specific default.) | |
| 303 | |
| 304 def __init__(self, destination=None, destination_path=None, | |
| 305 encoding=None, error_handler='strict', autoclose=True, | |
| 306 handle_io_errors=None, mode=None): | |
| 307 """ | |
| 308 :Parameters: | |
| 309 - `destination`: either a file-like object (which is written | |
| 310 directly) or `None` (which implies `sys.stdout` if no | |
| 311 `destination_path` given). | |
| 312 - `destination_path`: a path to a file, which is opened and then | |
| 313 written. | |
| 314 - `encoding`: the text encoding of the output file. | |
| 315 - `error_handler`: the encoding error handler to use. | |
| 316 - `autoclose`: close automatically after write (except when | |
| 317 `sys.stdout` or `sys.stderr` is the destination). | |
| 318 - `handle_io_errors`: ignored, deprecated, will be removed. | |
| 319 - `mode`: how the file is to be opened (see standard function | |
| 320 `open`). The default is 'w', providing universal newline | |
| 321 support for text files. | |
| 322 """ | |
| 323 Output.__init__(self, destination, destination_path, | |
| 324 encoding, error_handler) | |
| 325 self.opened = True | |
| 326 self.autoclose = autoclose | |
| 327 if mode is not None: | |
| 328 self.mode = mode | |
| 329 self._stderr = ErrorOutput() | |
| 330 if destination is None: | |
| 331 if destination_path: | |
| 332 self.opened = False | |
| 333 else: | |
| 334 self.destination = sys.stdout | |
| 335 elif (# destination is file-type object -> check mode: | |
| 336 mode and hasattr(self.destination, 'mode') | |
| 337 and mode != self.destination.mode): | |
| 338 print >>self._stderr, ('Warning: Destination mode "%s" ' | |
| 339 'differs from specified mode "%s"' % | |
| 340 (self.destination.mode, mode)) | |
| 341 if not destination_path: | |
| 342 try: | |
| 343 self.destination_path = self.destination.name | |
| 344 except AttributeError: | |
| 345 pass | |
| 346 | |
| 347 def open(self): | |
| 348 # Specify encoding in Python 3. | |
| 349 if sys.version_info >= (3,0) and 'b' not in self.mode: | |
| 350 kwargs = {'encoding': self.encoding, | |
| 351 'errors': self.error_handler} | |
| 352 else: | |
| 353 kwargs = {} | |
| 354 try: | |
| 355 self.destination = open(self.destination_path, self.mode, **kwargs) | |
| 356 except IOError, error: | |
| 357 raise OutputError(error.errno, error.strerror, | |
| 358 self.destination_path) | |
| 359 self.opened = True | |
| 360 | |
| 361 def write(self, data): | |
| 362 """Encode `data`, write it to a single file, and return it. | |
| 363 | |
| 364 With Python 3 or binary output mode, `data` is returned unchanged, | |
| 365 except when specified encoding and output encoding differ. | |
| 366 """ | |
| 367 if not self.opened: | |
| 368 self.open() | |
| 369 if ('b' not in self.mode and sys.version_info < (3,0) | |
| 370 or check_encoding(self.destination, self.encoding) is False | |
| 371 ): | |
| 372 if sys.version_info >= (3,0) and os.linesep != '\n': | |
| 373 data = data.replace('\n', os.linesep) # fix endings | |
| 374 data = self.encode(data) | |
| 375 | |
| 376 try: # In Python < 2.5, try...except has to be nested in try...finally. | |
| 377 try: | |
| 378 self.destination.write(data) | |
| 379 except TypeError, e: | |
| 380 if sys.version_info >= (3,0) and isinstance(data, bytes): | |
| 381 try: | |
| 382 self.destination.buffer.write(data) | |
| 383 except AttributeError: | |
| 384 if check_encoding(self.destination, | |
| 385 self.encoding) is False: | |
| 386 raise ValueError('Encoding of %s (%s) differs \n' | |
| 387 ' from specified encoding (%s)' % | |
| 388 (self.destination_path or 'destination', | |
| 389 self.destination.encoding, self.encoding)) | |
| 390 else: | |
| 391 raise e | |
| 392 except (UnicodeError, LookupError), err: | |
| 393 raise UnicodeError( | |
| 394 'Unable to encode output data. output-encoding is: ' | |
| 395 '%s.\n(%s)' % (self.encoding, ErrorString(err))) | |
| 396 finally: | |
| 397 if self.autoclose: | |
| 398 self.close() | |
| 399 return data | |
| 400 | |
| 401 def close(self): | |
| 402 if self.destination not in (sys.stdout, sys.stderr): | |
| 403 self.destination.close() | |
| 404 self.opened = False | |
| 405 | |
| 406 | |
| 407 class BinaryFileOutput(FileOutput): | |
| 408 """ | |
| 409 A version of docutils.io.FileOutput which writes to a binary file. | |
| 410 """ | |
| 411 # Used by core.publish_cmdline_to_binary() which in turn is used by | |
| 412 # rst2odt (OpenOffice writer) | |
| 413 mode = 'wb' | |
| 414 | |
| 415 | |
| 416 class StringInput(Input): | |
| 417 | |
| 418 """ | |
| 419 Direct string input. | |
| 420 """ | |
| 421 | |
| 422 default_source_path = '<string>' | |
| 423 | |
| 424 def read(self): | |
| 425 """Decode and return the source string.""" | |
| 426 return self.decode(self.source) | |
| 427 | |
| 428 | |
| 429 class StringOutput(Output): | |
| 430 | |
| 431 """ | |
| 432 Direct string output. | |
| 433 """ | |
| 434 | |
| 435 default_destination_path = '<string>' | |
| 436 | |
| 437 def write(self, data): | |
| 438 """Encode `data`, store it in `self.destination`, and return it.""" | |
| 439 self.destination = self.encode(data) | |
| 440 return self.destination | |
| 441 | |
| 442 | |
| 443 class NullInput(Input): | |
| 444 | |
| 445 """ | |
| 446 Degenerate input: read nothing. | |
| 447 """ | |
| 448 | |
| 449 default_source_path = 'null input' | |
| 450 | |
| 451 def read(self): | |
| 452 """Return a null string.""" | |
| 453 return u'' | |
| 454 | |
| 455 | |
| 456 class NullOutput(Output): | |
| 457 | |
| 458 """ | |
| 459 Degenerate output: write nothing. | |
| 460 """ | |
| 461 | |
| 462 default_destination_path = 'null output' | |
| 463 | |
| 464 def write(self, data): | |
| 465 """Do nothing ([don't even] send data to the bit bucket).""" | |
| 466 pass | |
| 467 | |
| 468 | |
| 469 class DocTreeInput(Input): | |
| 470 | |
| 471 """ | |
| 472 Adapter for document tree input. | |
| 473 | |
| 474 The document tree must be passed in the ``source`` parameter. | |
| 475 """ | |
| 476 | |
| 477 default_source_path = 'doctree input' | |
| 478 | |
| 479 def read(self): | |
| 480 """Return the document tree.""" | |
| 481 return self.source |
