Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/docutils/parsers/rst/states.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 # $Id: states.py 7640 2013-03-25 20:57:52Z milde $ | |
2 # Author: David Goodger <goodger@python.org> | |
3 # Copyright: This module has been placed in the public domain. | |
4 | |
5 """ | |
6 This is the ``docutils.parsers.rst.states`` module, the core of | |
7 the reStructuredText parser. It defines the following: | |
8 | |
9 :Classes: | |
10 - `RSTStateMachine`: reStructuredText parser's entry point. | |
11 - `NestedStateMachine`: recursive StateMachine. | |
12 - `RSTState`: reStructuredText State superclass. | |
13 - `Inliner`: For parsing inline markup. | |
14 - `Body`: Generic classifier of the first line of a block. | |
15 - `SpecializedBody`: Superclass for compound element members. | |
16 - `BulletList`: Second and subsequent bullet_list list_items | |
17 - `DefinitionList`: Second+ definition_list_items. | |
18 - `EnumeratedList`: Second+ enumerated_list list_items. | |
19 - `FieldList`: Second+ fields. | |
20 - `OptionList`: Second+ option_list_items. | |
21 - `RFC2822List`: Second+ RFC2822-style fields. | |
22 - `ExtensionOptions`: Parses directive option fields. | |
23 - `Explicit`: Second+ explicit markup constructs. | |
24 - `SubstitutionDef`: For embedded directives in substitution definitions. | |
25 - `Text`: Classifier of second line of a text block. | |
26 - `SpecializedText`: Superclass for continuation lines of Text-variants. | |
27 - `Definition`: Second line of potential definition_list_item. | |
28 - `Line`: Second line of overlined section title or transition marker. | |
29 - `Struct`: An auxiliary collection class. | |
30 | |
31 :Exception classes: | |
32 - `MarkupError` | |
33 - `ParserError` | |
34 - `MarkupMismatch` | |
35 | |
36 :Functions: | |
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls. | |
38 - `unescape()`: Return a string, nulls removed or restored to backslashes. | |
39 | |
40 :Attributes: | |
41 - `state_classes`: set of State classes used with `RSTStateMachine`. | |
42 | |
43 Parser Overview | |
44 =============== | |
45 | |
46 The reStructuredText parser is implemented as a recursive state machine, | |
47 examining its input one line at a time. To understand how the parser works, | |
48 please first become familiar with the `docutils.statemachine` module. In the | |
49 description below, references are made to classes defined in this module; | |
50 please see the individual classes for details. | |
51 | |
52 Parsing proceeds as follows: | |
53 | |
54 1. The state machine examines each line of input, checking each of the | |
55 transition patterns of the state `Body`, in order, looking for a match. | |
56 The implicit transitions (blank lines and indentation) are checked before | |
57 any others. The 'text' transition is a catch-all (matches anything). | |
58 | |
59 2. The method associated with the matched transition pattern is called. | |
60 | |
61 A. Some transition methods are self-contained, appending elements to the | |
62 document tree (`Body.doctest` parses a doctest block). The parser's | |
63 current line index is advanced to the end of the element, and parsing | |
64 continues with step 1. | |
65 | |
66 B. Other transition methods trigger the creation of a nested state machine, | |
67 whose job is to parse a compound construct ('indent' does a block quote, | |
68 'bullet' does a bullet list, 'overline' does a section [first checking | |
69 for a valid section header], etc.). | |
70 | |
71 - In the case of lists and explicit markup, a one-off state machine is | |
72 created and run to parse contents of the first item. | |
73 | |
74 - A new state machine is created and its initial state is set to the | |
75 appropriate specialized state (`BulletList` in the case of the | |
76 'bullet' transition; see `SpecializedBody` for more detail). This | |
77 state machine is run to parse the compound element (or series of | |
78 explicit markup elements), and returns as soon as a non-member element | |
79 is encountered. For example, the `BulletList` state machine ends as | |
80 soon as it encounters an element which is not a list item of that | |
81 bullet list. The optional omission of inter-element blank lines is | |
82 enabled by this nested state machine. | |
83 | |
84 - The current line index is advanced to the end of the elements parsed, | |
85 and parsing continues with step 1. | |
86 | |
87 C. The result of the 'text' transition depends on the next line of text. | |
88 The current state is changed to `Text`, under which the second line is | |
89 examined. If the second line is: | |
90 | |
91 - Indented: The element is a definition list item, and parsing proceeds | |
92 similarly to step 2.B, using the `DefinitionList` state. | |
93 | |
94 - A line of uniform punctuation characters: The element is a section | |
95 header; again, parsing proceeds as in step 2.B, and `Body` is still | |
96 used. | |
97 | |
98 - Anything else: The element is a paragraph, which is examined for | |
99 inline markup and appended to the parent element. Processing | |
100 continues with step 1. | |
101 """ | |
102 | |
103 __docformat__ = 'reStructuredText' | |
104 | |
105 | |
106 import sys | |
107 import re | |
108 from types import FunctionType, MethodType | |
109 | |
110 from docutils import nodes, statemachine, utils | |
111 from docutils import ApplicationError, DataError | |
112 from docutils.statemachine import StateMachineWS, StateWS | |
113 from docutils.nodes import fully_normalize_name as normalize_name | |
114 from docutils.nodes import whitespace_normalize_name | |
115 import docutils.parsers.rst | |
116 from docutils.parsers.rst import directives, languages, tableparser, roles | |
117 from docutils.parsers.rst.languages import en as _fallback_language_module | |
118 from docutils.utils import escape2null, unescape, column_width | |
119 from docutils.utils import punctuation_chars, roman, urischemes | |
120 | |
121 class MarkupError(DataError): pass | |
122 class UnknownInterpretedRoleError(DataError): pass | |
123 class InterpretedRoleNotImplementedError(DataError): pass | |
124 class ParserError(ApplicationError): pass | |
125 class MarkupMismatch(Exception): pass | |
126 | |
127 | |
128 class Struct: | |
129 | |
130 """Stores data attributes for dotted-attribute access.""" | |
131 | |
132 def __init__(self, **keywordargs): | |
133 self.__dict__.update(keywordargs) | |
134 | |
135 | |
136 class RSTStateMachine(StateMachineWS): | |
137 | |
138 """ | |
139 reStructuredText's master StateMachine. | |
140 | |
141 The entry point to reStructuredText parsing is the `run()` method. | |
142 """ | |
143 | |
144 def run(self, input_lines, document, input_offset=0, match_titles=True, | |
145 inliner=None): | |
146 """ | |
147 Parse `input_lines` and modify the `document` node in place. | |
148 | |
149 Extend `StateMachineWS.run()`: set up parse-global data and | |
150 run the StateMachine. | |
151 """ | |
152 self.language = languages.get_language( | |
153 document.settings.language_code) | |
154 self.match_titles = match_titles | |
155 if inliner is None: | |
156 inliner = Inliner() | |
157 inliner.init_customizations(document.settings) | |
158 self.memo = Struct(document=document, | |
159 reporter=document.reporter, | |
160 language=self.language, | |
161 title_styles=[], | |
162 section_level=0, | |
163 section_bubble_up_kludge=False, | |
164 inliner=inliner) | |
165 self.document = document | |
166 self.attach_observer(document.note_source) | |
167 self.reporter = self.memo.reporter | |
168 self.node = document | |
169 results = StateMachineWS.run(self, input_lines, input_offset, | |
170 input_source=document['source']) | |
171 assert results == [], 'RSTStateMachine.run() results should be empty!' | |
172 self.node = self.memo = None # remove unneeded references | |
173 | |
174 | |
175 class NestedStateMachine(StateMachineWS): | |
176 | |
177 """ | |
178 StateMachine run from within other StateMachine runs, to parse nested | |
179 document structures. | |
180 """ | |
181 | |
182 def run(self, input_lines, input_offset, memo, node, match_titles=True): | |
183 """ | |
184 Parse `input_lines` and populate a `docutils.nodes.document` instance. | |
185 | |
186 Extend `StateMachineWS.run()`: set up document-wide data. | |
187 """ | |
188 self.match_titles = match_titles | |
189 self.memo = memo | |
190 self.document = memo.document | |
191 self.attach_observer(self.document.note_source) | |
192 self.reporter = memo.reporter | |
193 self.language = memo.language | |
194 self.node = node | |
195 results = StateMachineWS.run(self, input_lines, input_offset) | |
196 assert results == [], ('NestedStateMachine.run() results should be ' | |
197 'empty!') | |
198 return results | |
199 | |
200 | |
201 class RSTState(StateWS): | |
202 | |
203 """ | |
204 reStructuredText State superclass. | |
205 | |
206 Contains methods used by all State subclasses. | |
207 """ | |
208 | |
209 nested_sm = NestedStateMachine | |
210 nested_sm_cache = [] | |
211 | |
212 def __init__(self, state_machine, debug=False): | |
213 self.nested_sm_kwargs = {'state_classes': state_classes, | |
214 'initial_state': 'Body'} | |
215 StateWS.__init__(self, state_machine, debug) | |
216 | |
217 def runtime_init(self): | |
218 StateWS.runtime_init(self) | |
219 memo = self.state_machine.memo | |
220 self.memo = memo | |
221 self.reporter = memo.reporter | |
222 self.inliner = memo.inliner | |
223 self.document = memo.document | |
224 self.parent = self.state_machine.node | |
225 # enable the reporter to determine source and source-line | |
226 if not hasattr(self.reporter, 'get_source_and_line'): | |
227 self.reporter.get_source_and_line = self.state_machine.get_source_and_line | |
228 # print "adding get_source_and_line to reporter", self.state_machine.input_offset | |
229 | |
230 | |
231 def goto_line(self, abs_line_offset): | |
232 """ | |
233 Jump to input line `abs_line_offset`, ignoring jumps past the end. | |
234 """ | |
235 try: | |
236 self.state_machine.goto_line(abs_line_offset) | |
237 except EOFError: | |
238 pass | |
239 | |
240 def no_match(self, context, transitions): | |
241 """ | |
242 Override `StateWS.no_match` to generate a system message. | |
243 | |
244 This code should never be run. | |
245 """ | |
246 self.reporter.severe( | |
247 'Internal error: no transition pattern match. State: "%s"; ' | |
248 'transitions: %s; context: %s; current line: %r.' | |
249 % (self.__class__.__name__, transitions, context, | |
250 self.state_machine.line)) | |
251 return context, None, [] | |
252 | |
253 def bof(self, context): | |
254 """Called at beginning of file.""" | |
255 return [], [] | |
256 | |
257 def nested_parse(self, block, input_offset, node, match_titles=False, | |
258 state_machine_class=None, state_machine_kwargs=None): | |
259 """ | |
260 Create a new StateMachine rooted at `node` and run it over the input | |
261 `block`. | |
262 """ | |
263 use_default = 0 | |
264 if state_machine_class is None: | |
265 state_machine_class = self.nested_sm | |
266 use_default += 1 | |
267 if state_machine_kwargs is None: | |
268 state_machine_kwargs = self.nested_sm_kwargs | |
269 use_default += 1 | |
270 block_length = len(block) | |
271 | |
272 state_machine = None | |
273 if use_default == 2: | |
274 try: | |
275 state_machine = self.nested_sm_cache.pop() | |
276 except IndexError: | |
277 pass | |
278 if not state_machine: | |
279 state_machine = state_machine_class(debug=self.debug, | |
280 **state_machine_kwargs) | |
281 state_machine.run(block, input_offset, memo=self.memo, | |
282 node=node, match_titles=match_titles) | |
283 if use_default == 2: | |
284 self.nested_sm_cache.append(state_machine) | |
285 else: | |
286 state_machine.unlink() | |
287 new_offset = state_machine.abs_line_offset() | |
288 # No `block.parent` implies disconnected -- lines aren't in sync: | |
289 if block.parent and (len(block) - block_length) != 0: | |
290 # Adjustment for block if modified in nested parse: | |
291 self.state_machine.next_line(len(block) - block_length) | |
292 return new_offset | |
293 | |
294 def nested_list_parse(self, block, input_offset, node, initial_state, | |
295 blank_finish, | |
296 blank_finish_state=None, | |
297 extra_settings={}, | |
298 match_titles=False, | |
299 state_machine_class=None, | |
300 state_machine_kwargs=None): | |
301 """ | |
302 Create a new StateMachine rooted at `node` and run it over the input | |
303 `block`. Also keep track of optional intermediate blank lines and the | |
304 required final one. | |
305 """ | |
306 if state_machine_class is None: | |
307 state_machine_class = self.nested_sm | |
308 if state_machine_kwargs is None: | |
309 state_machine_kwargs = self.nested_sm_kwargs.copy() | |
310 state_machine_kwargs['initial_state'] = initial_state | |
311 state_machine = state_machine_class(debug=self.debug, | |
312 **state_machine_kwargs) | |
313 if blank_finish_state is None: | |
314 blank_finish_state = initial_state | |
315 state_machine.states[blank_finish_state].blank_finish = blank_finish | |
316 for key, value in extra_settings.items(): | |
317 setattr(state_machine.states[initial_state], key, value) | |
318 state_machine.run(block, input_offset, memo=self.memo, | |
319 node=node, match_titles=match_titles) | |
320 blank_finish = state_machine.states[blank_finish_state].blank_finish | |
321 state_machine.unlink() | |
322 return state_machine.abs_line_offset(), blank_finish | |
323 | |
324 def section(self, title, source, style, lineno, messages): | |
325 """Check for a valid subsection and create one if it checks out.""" | |
326 if self.check_subsection(source, style, lineno): | |
327 self.new_subsection(title, lineno, messages) | |
328 | |
329 def check_subsection(self, source, style, lineno): | |
330 """ | |
331 Check for a valid subsection header. Return 1 (true) or None (false). | |
332 | |
333 When a new section is reached that isn't a subsection of the current | |
334 section, back up the line count (use ``previous_line(-x)``), then | |
335 ``raise EOFError``. The current StateMachine will finish, then the | |
336 calling StateMachine can re-examine the title. This will work its way | |
337 back up the calling chain until the correct section level isreached. | |
338 | |
339 @@@ Alternative: Evaluate the title, store the title info & level, and | |
340 back up the chain until that level is reached. Store in memo? Or | |
341 return in results? | |
342 | |
343 :Exception: `EOFError` when a sibling or supersection encountered. | |
344 """ | |
345 memo = self.memo | |
346 title_styles = memo.title_styles | |
347 mylevel = memo.section_level | |
348 try: # check for existing title style | |
349 level = title_styles.index(style) + 1 | |
350 except ValueError: # new title style | |
351 if len(title_styles) == memo.section_level: # new subsection | |
352 title_styles.append(style) | |
353 return 1 | |
354 else: # not at lowest level | |
355 self.parent += self.title_inconsistent(source, lineno) | |
356 return None | |
357 if level <= mylevel: # sibling or supersection | |
358 memo.section_level = level # bubble up to parent section | |
359 if len(style) == 2: | |
360 memo.section_bubble_up_kludge = True | |
361 # back up 2 lines for underline title, 3 for overline title | |
362 self.state_machine.previous_line(len(style) + 1) | |
363 raise EOFError # let parent section re-evaluate | |
364 if level == mylevel + 1: # immediate subsection | |
365 return 1 | |
366 else: # invalid subsection | |
367 self.parent += self.title_inconsistent(source, lineno) | |
368 return None | |
369 | |
370 def title_inconsistent(self, sourcetext, lineno): | |
371 error = self.reporter.severe( | |
372 'Title level inconsistent:', nodes.literal_block('', sourcetext), | |
373 line=lineno) | |
374 return error | |
375 | |
376 def new_subsection(self, title, lineno, messages): | |
377 """Append new subsection to document tree. On return, check level.""" | |
378 memo = self.memo | |
379 mylevel = memo.section_level | |
380 memo.section_level += 1 | |
381 section_node = nodes.section() | |
382 self.parent += section_node | |
383 textnodes, title_messages = self.inline_text(title, lineno) | |
384 titlenode = nodes.title(title, '', *textnodes) | |
385 name = normalize_name(titlenode.astext()) | |
386 section_node['names'].append(name) | |
387 section_node += titlenode | |
388 section_node += messages | |
389 section_node += title_messages | |
390 self.document.note_implicit_target(section_node, section_node) | |
391 offset = self.state_machine.line_offset + 1 | |
392 absoffset = self.state_machine.abs_line_offset() + 1 | |
393 newabsoffset = self.nested_parse( | |
394 self.state_machine.input_lines[offset:], input_offset=absoffset, | |
395 node=section_node, match_titles=True) | |
396 self.goto_line(newabsoffset) | |
397 if memo.section_level <= mylevel: # can't handle next section? | |
398 raise EOFError # bubble up to supersection | |
399 # reset section_level; next pass will detect it properly | |
400 memo.section_level = mylevel | |
401 | |
402 def paragraph(self, lines, lineno): | |
403 """ | |
404 Return a list (paragraph & messages) & a boolean: literal_block next? | |
405 """ | |
406 data = '\n'.join(lines).rstrip() | |
407 if re.search(r'(?<!\\)(\\\\)*::$', data): | |
408 if len(data) == 2: | |
409 return [], 1 | |
410 elif data[-3] in ' \n': | |
411 text = data[:-3].rstrip() | |
412 else: | |
413 text = data[:-1] | |
414 literalnext = 1 | |
415 else: | |
416 text = data | |
417 literalnext = 0 | |
418 textnodes, messages = self.inline_text(text, lineno) | |
419 p = nodes.paragraph(data, '', *textnodes) | |
420 p.source, p.line = self.state_machine.get_source_and_line(lineno) | |
421 return [p] + messages, literalnext | |
422 | |
423 def inline_text(self, text, lineno): | |
424 """ | |
425 Return 2 lists: nodes (text and inline elements), and system_messages. | |
426 """ | |
427 return self.inliner.parse(text, lineno, self.memo, self.parent) | |
428 | |
429 def unindent_warning(self, node_name): | |
430 # the actual problem is one line below the current line | |
431 lineno = self.state_machine.abs_line_number()+1 | |
432 return self.reporter.warning('%s ends without a blank line; ' | |
433 'unexpected unindent.' % node_name, | |
434 line=lineno) | |
435 | |
436 | |
437 def build_regexp(definition, compile=True): | |
438 """ | |
439 Build, compile and return a regular expression based on `definition`. | |
440 | |
441 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts), | |
442 where "parts" is a list of regular expressions and/or regular | |
443 expression definitions to be joined into an or-group. | |
444 """ | |
445 name, prefix, suffix, parts = definition | |
446 part_strings = [] | |
447 for part in parts: | |
448 if type(part) is tuple: | |
449 part_strings.append(build_regexp(part, None)) | |
450 else: | |
451 part_strings.append(part) | |
452 or_group = '|'.join(part_strings) | |
453 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals() | |
454 if compile: | |
455 return re.compile(regexp, re.UNICODE) | |
456 else: | |
457 return regexp | |
458 | |
459 | |
460 class Inliner: | |
461 | |
462 """ | |
463 Parse inline markup; call the `parse()` method. | |
464 """ | |
465 | |
466 def __init__(self): | |
467 self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),] | |
468 """List of (pattern, bound method) tuples, used by | |
469 `self.implicit_inline`.""" | |
470 | |
471 def init_customizations(self, settings): | |
472 """Setting-based customizations; run when parsing begins.""" | |
473 if settings.pep_references: | |
474 self.implicit_dispatch.append((self.patterns.pep, | |
475 self.pep_reference)) | |
476 if settings.rfc_references: | |
477 self.implicit_dispatch.append((self.patterns.rfc, | |
478 self.rfc_reference)) | |
479 | |
480 def parse(self, text, lineno, memo, parent): | |
481 # Needs to be refactored for nested inline markup. | |
482 # Add nested_parse() method? | |
483 """ | |
484 Return 2 lists: nodes (text and inline elements), and system_messages. | |
485 | |
486 Using `self.patterns.initial`, a pattern which matches start-strings | |
487 (emphasis, strong, interpreted, phrase reference, literal, | |
488 substitution reference, and inline target) and complete constructs | |
489 (simple reference, footnote reference), search for a candidate. When | |
490 one is found, check for validity (e.g., not a quoted '*' character). | |
491 If valid, search for the corresponding end string if applicable, and | |
492 check it for validity. If not found or invalid, generate a warning | |
493 and ignore the start-string. Implicit inline markup (e.g. standalone | |
494 URIs) is found last. | |
495 """ | |
496 self.reporter = memo.reporter | |
497 self.document = memo.document | |
498 self.language = memo.language | |
499 self.parent = parent | |
500 pattern_search = self.patterns.initial.search | |
501 dispatch = self.dispatch | |
502 remaining = escape2null(text) | |
503 processed = [] | |
504 unprocessed = [] | |
505 messages = [] | |
506 while remaining: | |
507 match = pattern_search(remaining) | |
508 if match: | |
509 groups = match.groupdict() | |
510 method = dispatch[groups['start'] or groups['backquote'] | |
511 or groups['refend'] or groups['fnend']] | |
512 before, inlines, remaining, sysmessages = method(self, match, | |
513 lineno) | |
514 unprocessed.append(before) | |
515 messages += sysmessages | |
516 if inlines: | |
517 processed += self.implicit_inline(''.join(unprocessed), | |
518 lineno) | |
519 processed += inlines | |
520 unprocessed = [] | |
521 else: | |
522 break | |
523 remaining = ''.join(unprocessed) + remaining | |
524 if remaining: | |
525 processed += self.implicit_inline(remaining, lineno) | |
526 return processed, messages | |
527 | |
528 # Inline object recognition | |
529 # ------------------------- | |
530 # lookahead and look-behind expressions for inline markup rules | |
531 start_string_prefix = (u'(^|(?<=\\s|[%s%s]))' % | |
532 (punctuation_chars.openers, | |
533 punctuation_chars.delimiters)) | |
534 end_string_suffix = (u'($|(?=\\s|[\x00%s%s%s]))' % | |
535 (punctuation_chars.closing_delimiters, | |
536 punctuation_chars.delimiters, | |
537 punctuation_chars.closers)) | |
538 # print start_string_prefix.encode('utf8') | |
539 # TODO: support non-ASCII whitespace in the following 4 patterns? | |
540 non_whitespace_before = r'(?<![ \n])' | |
541 non_whitespace_escape_before = r'(?<![ \n\x00])' | |
542 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[ \n\x00])' | |
543 non_whitespace_after = r'(?![ \n])' | |
544 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together): | |
545 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*' | |
546 # Valid URI characters (see RFC 2396 & RFC 2732); | |
547 # final \x00 allows backslash escapes in URIs: | |
548 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]""" | |
549 # Delimiter indicating the end of a URI (not part of the URI): | |
550 uri_end_delim = r"""[>]""" | |
551 # Last URI character; same as uric but no punctuation: | |
552 urilast = r"""[_~*/=+a-zA-Z0-9]""" | |
553 # End of a URI (either 'urilast' or 'uric followed by a | |
554 # uri_end_delim'): | |
555 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals() | |
556 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]""" | |
557 email_pattern = r""" | |
558 %(emailc)s+(?:\.%(emailc)s+)* # name | |
559 (?<!\x00)@ # at | |
560 %(emailc)s+(?:\.%(emailc)s*)* # host | |
561 %(uri_end)s # final URI char | |
562 """ | |
563 parts = ('initial_inline', start_string_prefix, '', | |
564 [('start', '', non_whitespace_after, # simple start-strings | |
565 [r'\*\*', # strong | |
566 r'\*(?!\*)', # emphasis but not strong | |
567 r'``', # literal | |
568 r'_`', # inline internal target | |
569 r'\|(?!\|)'] # substitution reference | |
570 ), | |
571 ('whole', '', end_string_suffix, # whole constructs | |
572 [# reference name & end-string | |
573 r'(?P<refname>%s)(?P<refend>__?)' % simplename, | |
574 ('footnotelabel', r'\[', r'(?P<fnend>\]_)', | |
575 [r'[0-9]+', # manually numbered | |
576 r'\#(%s)?' % simplename, # auto-numbered (w/ label?) | |
577 r'\*', # auto-symbol | |
578 r'(?P<citationlabel>%s)' % simplename] # citation reference | |
579 ) | |
580 ] | |
581 ), | |
582 ('backquote', # interpreted text or phrase reference | |
583 '(?P<role>(:%s:)?)' % simplename, # optional role | |
584 non_whitespace_after, | |
585 ['`(?!`)'] # but not literal | |
586 ) | |
587 ] | |
588 ) | |
589 patterns = Struct( | |
590 initial=build_regexp(parts), | |
591 emphasis=re.compile(non_whitespace_escape_before | |
592 + r'(\*)' + end_string_suffix, re.UNICODE), | |
593 strong=re.compile(non_whitespace_escape_before | |
594 + r'(\*\*)' + end_string_suffix, re.UNICODE), | |
595 interpreted_or_phrase_ref=re.compile( | |
596 r""" | |
597 %(non_unescaped_whitespace_escape_before)s | |
598 ( | |
599 ` | |
600 (?P<suffix> | |
601 (?P<role>:%(simplename)s:)? | |
602 (?P<refend>__?)? | |
603 ) | |
604 ) | |
605 %(end_string_suffix)s | |
606 """ % locals(), re.VERBOSE | re.UNICODE), | |
607 embedded_link=re.compile( | |
608 r""" | |
609 ( | |
610 (?:[ \n]+|^) # spaces or beginning of line/string | |
611 < # open bracket | |
612 %(non_whitespace_after)s | |
613 ([^<>\x00]+(\x00_)?) # anything but angle brackets & nulls | |
614 # except escaped trailing low line | |
615 %(non_whitespace_before)s | |
616 > # close bracket w/o whitespace before | |
617 ) | |
618 $ # end of string | |
619 """ % locals(), re.VERBOSE | re.UNICODE), | |
620 literal=re.compile(non_whitespace_before + '(``)' | |
621 + end_string_suffix), | |
622 target=re.compile(non_whitespace_escape_before | |
623 + r'(`)' + end_string_suffix), | |
624 substitution_ref=re.compile(non_whitespace_escape_before | |
625 + r'(\|_{0,2})' | |
626 + end_string_suffix), | |
627 email=re.compile(email_pattern % locals() + '$', | |
628 re.VERBOSE | re.UNICODE), | |
629 uri=re.compile( | |
630 (r""" | |
631 %(start_string_prefix)s | |
632 (?P<whole> | |
633 (?P<absolute> # absolute URI | |
634 (?P<scheme> # scheme (http, ftp, mailto) | |
635 [a-zA-Z][a-zA-Z0-9.+-]* | |
636 ) | |
637 : | |
638 ( | |
639 ( # either: | |
640 (//?)? # hierarchical URI | |
641 %(uric)s* # URI characters | |
642 %(uri_end)s # final URI char | |
643 ) | |
644 ( # optional query | |
645 \?%(uric)s* | |
646 %(uri_end)s | |
647 )? | |
648 ( # optional fragment | |
649 \#%(uric)s* | |
650 %(uri_end)s | |
651 )? | |
652 ) | |
653 ) | |
654 | # *OR* | |
655 (?P<email> # email address | |
656 """ + email_pattern + r""" | |
657 ) | |
658 ) | |
659 %(end_string_suffix)s | |
660 """) % locals(), re.VERBOSE | re.UNICODE), | |
661 pep=re.compile( | |
662 r""" | |
663 %(start_string_prefix)s | |
664 ( | |
665 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file | |
666 | | |
667 (PEP\s+(?P<pepnum2>\d+)) # reference by name | |
668 ) | |
669 %(end_string_suffix)s""" % locals(), re.VERBOSE | re.UNICODE), | |
670 rfc=re.compile( | |
671 r""" | |
672 %(start_string_prefix)s | |
673 (RFC(-|\s+)?(?P<rfcnum>\d+)) | |
674 %(end_string_suffix)s""" % locals(), re.VERBOSE | re.UNICODE)) | |
675 | |
676 def quoted_start(self, match): | |
677 """Test if inline markup start-string is 'quoted'. | |
678 | |
679 'Quoted' in this context means the start-string is enclosed in a pair | |
680 of matching opening/closing delimiters (not necessarily quotes) | |
681 or at the end of the match. | |
682 """ | |
683 string = match.string | |
684 start = match.start() | |
685 if start == 0: # start-string at beginning of text | |
686 return False | |
687 prestart = string[start - 1] | |
688 try: | |
689 poststart = string[match.end()] | |
690 except IndexError: # start-string at end of text | |
691 return True # not "quoted" but no markup start-string either | |
692 return punctuation_chars.match_chars(prestart, poststart) | |
693 | |
694 def inline_obj(self, match, lineno, end_pattern, nodeclass, | |
695 restore_backslashes=False): | |
696 string = match.string | |
697 matchstart = match.start('start') | |
698 matchend = match.end('start') | |
699 if self.quoted_start(match): | |
700 return (string[:matchend], [], string[matchend:], [], '') | |
701 endmatch = end_pattern.search(string[matchend:]) | |
702 if endmatch and endmatch.start(1): # 1 or more chars | |
703 text = unescape(endmatch.string[:endmatch.start(1)], | |
704 restore_backslashes) | |
705 textend = matchend + endmatch.end(1) | |
706 rawsource = unescape(string[matchstart:textend], 1) | |
707 return (string[:matchstart], [nodeclass(rawsource, text)], | |
708 string[textend:], [], endmatch.group(1)) | |
709 msg = self.reporter.warning( | |
710 'Inline %s start-string without end-string.' | |
711 % nodeclass.__name__, line=lineno) | |
712 text = unescape(string[matchstart:matchend], 1) | |
713 rawsource = unescape(string[matchstart:matchend], 1) | |
714 prb = self.problematic(text, rawsource, msg) | |
715 return string[:matchstart], [prb], string[matchend:], [msg], '' | |
716 | |
717 def problematic(self, text, rawsource, message): | |
718 msgid = self.document.set_id(message, self.parent) | |
719 problematic = nodes.problematic(rawsource, text, refid=msgid) | |
720 prbid = self.document.set_id(problematic) | |
721 message.add_backref(prbid) | |
722 return problematic | |
723 | |
724 def emphasis(self, match, lineno): | |
725 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
726 match, lineno, self.patterns.emphasis, nodes.emphasis) | |
727 return before, inlines, remaining, sysmessages | |
728 | |
729 def strong(self, match, lineno): | |
730 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
731 match, lineno, self.patterns.strong, nodes.strong) | |
732 return before, inlines, remaining, sysmessages | |
733 | |
734 def interpreted_or_phrase_ref(self, match, lineno): | |
735 end_pattern = self.patterns.interpreted_or_phrase_ref | |
736 string = match.string | |
737 matchstart = match.start('backquote') | |
738 matchend = match.end('backquote') | |
739 rolestart = match.start('role') | |
740 role = match.group('role') | |
741 position = '' | |
742 if role: | |
743 role = role[1:-1] | |
744 position = 'prefix' | |
745 elif self.quoted_start(match): | |
746 return (string[:matchend], [], string[matchend:], []) | |
747 endmatch = end_pattern.search(string[matchend:]) | |
748 if endmatch and endmatch.start(1): # 1 or more chars | |
749 textend = matchend + endmatch.end() | |
750 if endmatch.group('role'): | |
751 if role: | |
752 msg = self.reporter.warning( | |
753 'Multiple roles in interpreted text (both ' | |
754 'prefix and suffix present; only one allowed).', | |
755 line=lineno) | |
756 text = unescape(string[rolestart:textend], 1) | |
757 prb = self.problematic(text, text, msg) | |
758 return string[:rolestart], [prb], string[textend:], [msg] | |
759 role = endmatch.group('suffix')[1:-1] | |
760 position = 'suffix' | |
761 escaped = endmatch.string[:endmatch.start(1)] | |
762 rawsource = unescape(string[matchstart:textend], 1) | |
763 if rawsource[-1:] == '_': | |
764 if role: | |
765 msg = self.reporter.warning( | |
766 'Mismatch: both interpreted text role %s and ' | |
767 'reference suffix.' % position, line=lineno) | |
768 text = unescape(string[rolestart:textend], 1) | |
769 prb = self.problematic(text, text, msg) | |
770 return string[:rolestart], [prb], string[textend:], [msg] | |
771 return self.phrase_ref(string[:matchstart], string[textend:], | |
772 rawsource, escaped, unescape(escaped)) | |
773 else: | |
774 rawsource = unescape(string[rolestart:textend], 1) | |
775 nodelist, messages = self.interpreted(rawsource, escaped, role, | |
776 lineno) | |
777 return (string[:rolestart], nodelist, | |
778 string[textend:], messages) | |
779 msg = self.reporter.warning( | |
780 'Inline interpreted text or phrase reference start-string ' | |
781 'without end-string.', line=lineno) | |
782 text = unescape(string[matchstart:matchend], 1) | |
783 prb = self.problematic(text, text, msg) | |
784 return string[:matchstart], [prb], string[matchend:], [msg] | |
785 | |
786 def phrase_ref(self, before, after, rawsource, escaped, text): | |
787 match = self.patterns.embedded_link.search(escaped) | |
788 if match: # embedded <URI> or <alias_> | |
789 text = unescape(escaped[:match.start(0)]) | |
790 aliastext = unescape(match.group(2), restore_backslashes=True) | |
791 if aliastext.endswith('_') and not (aliastext.endswith(r'\_') | |
792 or self.patterns.uri.match(aliastext)): | |
793 aliastype = 'name' | |
794 alias = normalize_name(aliastext[:-1]) | |
795 target = nodes.target(match.group(1), refname=alias) | |
796 target.indirect_reference_name = aliastext[:-1] | |
797 else: | |
798 aliastype = 'uri' | |
799 alias = ''.join(aliastext.split()) | |
800 alias = self.adjust_uri(alias) | |
801 if alias.endswith(r'\_'): | |
802 alias = alias[:-2] + '_' | |
803 target = nodes.target(match.group(1), refuri=alias) | |
804 target.referenced = 1 | |
805 if not aliastext: | |
806 raise ApplicationError('problem with embedded link: %r' | |
807 % aliastext) | |
808 if not text: | |
809 text = alias | |
810 else: | |
811 target = None | |
812 | |
813 refname = normalize_name(text) | |
814 reference = nodes.reference(rawsource, text, | |
815 name=whitespace_normalize_name(text)) | |
816 node_list = [reference] | |
817 | |
818 if rawsource[-2:] == '__': | |
819 if target and (aliastype == 'name'): | |
820 reference['refname'] = alias | |
821 self.document.note_refname(reference) | |
822 # self.document.note_indirect_target(target) # required? | |
823 elif target and (aliastype == 'uri'): | |
824 reference['refuri'] = alias | |
825 else: | |
826 reference['anonymous'] = 1 | |
827 else: | |
828 if target: | |
829 target['names'].append(refname) | |
830 if aliastype == 'name': | |
831 reference['refname'] = alias | |
832 self.document.note_indirect_target(target) | |
833 self.document.note_refname(reference) | |
834 else: | |
835 reference['refuri'] = alias | |
836 self.document.note_explicit_target(target, self.parent) | |
837 # target.note_referenced_by(name=refname) | |
838 node_list.append(target) | |
839 else: | |
840 reference['refname'] = refname | |
841 self.document.note_refname(reference) | |
842 return before, node_list, after, [] | |
843 | |
844 | |
845 def adjust_uri(self, uri): | |
846 match = self.patterns.email.match(uri) | |
847 if match: | |
848 return 'mailto:' + uri | |
849 else: | |
850 return uri | |
851 | |
852 def interpreted(self, rawsource, text, role, lineno): | |
853 role_fn, messages = roles.role(role, self.language, lineno, | |
854 self.reporter) | |
855 if role_fn: | |
856 nodes, messages2 = role_fn(role, rawsource, text, lineno, self) | |
857 return nodes, messages + messages2 | |
858 else: | |
859 msg = self.reporter.error( | |
860 'Unknown interpreted text role "%s".' % role, | |
861 line=lineno) | |
862 return ([self.problematic(rawsource, rawsource, msg)], | |
863 messages + [msg]) | |
864 | |
865 def literal(self, match, lineno): | |
866 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
867 match, lineno, self.patterns.literal, nodes.literal, | |
868 restore_backslashes=True) | |
869 return before, inlines, remaining, sysmessages | |
870 | |
871 def inline_internal_target(self, match, lineno): | |
872 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
873 match, lineno, self.patterns.target, nodes.target) | |
874 if inlines and isinstance(inlines[0], nodes.target): | |
875 assert len(inlines) == 1 | |
876 target = inlines[0] | |
877 name = normalize_name(target.astext()) | |
878 target['names'].append(name) | |
879 self.document.note_explicit_target(target, self.parent) | |
880 return before, inlines, remaining, sysmessages | |
881 | |
882 def substitution_reference(self, match, lineno): | |
883 before, inlines, remaining, sysmessages, endstring = self.inline_obj( | |
884 match, lineno, self.patterns.substitution_ref, | |
885 nodes.substitution_reference) | |
886 if len(inlines) == 1: | |
887 subref_node = inlines[0] | |
888 if isinstance(subref_node, nodes.substitution_reference): | |
889 subref_text = subref_node.astext() | |
890 self.document.note_substitution_ref(subref_node, subref_text) | |
891 if endstring[-1:] == '_': | |
892 reference_node = nodes.reference( | |
893 '|%s%s' % (subref_text, endstring), '') | |
894 if endstring[-2:] == '__': | |
895 reference_node['anonymous'] = 1 | |
896 else: | |
897 reference_node['refname'] = normalize_name(subref_text) | |
898 self.document.note_refname(reference_node) | |
899 reference_node += subref_node | |
900 inlines = [reference_node] | |
901 return before, inlines, remaining, sysmessages | |
902 | |
903 def footnote_reference(self, match, lineno): | |
904 """ | |
905 Handles `nodes.footnote_reference` and `nodes.citation_reference` | |
906 elements. | |
907 """ | |
908 label = match.group('footnotelabel') | |
909 refname = normalize_name(label) | |
910 string = match.string | |
911 before = string[:match.start('whole')] | |
912 remaining = string[match.end('whole'):] | |
913 if match.group('citationlabel'): | |
914 refnode = nodes.citation_reference('[%s]_' % label, | |
915 refname=refname) | |
916 refnode += nodes.Text(label) | |
917 self.document.note_citation_ref(refnode) | |
918 else: | |
919 refnode = nodes.footnote_reference('[%s]_' % label) | |
920 if refname[0] == '#': | |
921 refname = refname[1:] | |
922 refnode['auto'] = 1 | |
923 self.document.note_autofootnote_ref(refnode) | |
924 elif refname == '*': | |
925 refname = '' | |
926 refnode['auto'] = '*' | |
927 self.document.note_symbol_footnote_ref( | |
928 refnode) | |
929 else: | |
930 refnode += nodes.Text(label) | |
931 if refname: | |
932 refnode['refname'] = refname | |
933 self.document.note_footnote_ref(refnode) | |
934 if utils.get_trim_footnote_ref_space(self.document.settings): | |
935 before = before.rstrip() | |
936 return (before, [refnode], remaining, []) | |
937 | |
938 def reference(self, match, lineno, anonymous=False): | |
939 referencename = match.group('refname') | |
940 refname = normalize_name(referencename) | |
941 referencenode = nodes.reference( | |
942 referencename + match.group('refend'), referencename, | |
943 name=whitespace_normalize_name(referencename)) | |
944 if anonymous: | |
945 referencenode['anonymous'] = 1 | |
946 else: | |
947 referencenode['refname'] = refname | |
948 self.document.note_refname(referencenode) | |
949 string = match.string | |
950 matchstart = match.start('whole') | |
951 matchend = match.end('whole') | |
952 return (string[:matchstart], [referencenode], string[matchend:], []) | |
953 | |
954 def anonymous_reference(self, match, lineno): | |
955 return self.reference(match, lineno, anonymous=1) | |
956 | |
957 def standalone_uri(self, match, lineno): | |
958 if (not match.group('scheme') | |
959 or match.group('scheme').lower() in urischemes.schemes): | |
960 if match.group('email'): | |
961 addscheme = 'mailto:' | |
962 else: | |
963 addscheme = '' | |
964 text = match.group('whole') | |
965 unescaped = unescape(text, 0) | |
966 return [nodes.reference(unescape(text, 1), unescaped, | |
967 refuri=addscheme + unescaped)] | |
968 else: # not a valid scheme | |
969 raise MarkupMismatch | |
970 | |
971 def pep_reference(self, match, lineno): | |
972 text = match.group(0) | |
973 if text.startswith('pep-'): | |
974 pepnum = int(match.group('pepnum1')) | |
975 elif text.startswith('PEP'): | |
976 pepnum = int(match.group('pepnum2')) | |
977 else: | |
978 raise MarkupMismatch | |
979 ref = (self.document.settings.pep_base_url | |
980 + self.document.settings.pep_file_url_template % pepnum) | |
981 unescaped = unescape(text, 0) | |
982 return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] | |
983 | |
984 rfc_url = 'rfc%d.html' | |
985 | |
986 def rfc_reference(self, match, lineno): | |
987 text = match.group(0) | |
988 if text.startswith('RFC'): | |
989 rfcnum = int(match.group('rfcnum')) | |
990 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum | |
991 else: | |
992 raise MarkupMismatch | |
993 unescaped = unescape(text, 0) | |
994 return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)] | |
995 | |
996 def implicit_inline(self, text, lineno): | |
997 """ | |
998 Check each of the patterns in `self.implicit_dispatch` for a match, | |
999 and dispatch to the stored method for the pattern. Recursively check | |
1000 the text before and after the match. Return a list of `nodes.Text` | |
1001 and inline element nodes. | |
1002 """ | |
1003 if not text: | |
1004 return [] | |
1005 for pattern, method in self.implicit_dispatch: | |
1006 match = pattern.search(text) | |
1007 if match: | |
1008 try: | |
1009 # Must recurse on strings before *and* after the match; | |
1010 # there may be multiple patterns. | |
1011 return (self.implicit_inline(text[:match.start()], lineno) | |
1012 + method(match, lineno) + | |
1013 self.implicit_inline(text[match.end():], lineno)) | |
1014 except MarkupMismatch: | |
1015 pass | |
1016 return [nodes.Text(unescape(text), rawsource=unescape(text, 1))] | |
1017 | |
1018 dispatch = {'*': emphasis, | |
1019 '**': strong, | |
1020 '`': interpreted_or_phrase_ref, | |
1021 '``': literal, | |
1022 '_`': inline_internal_target, | |
1023 ']_': footnote_reference, | |
1024 '|': substitution_reference, | |
1025 '_': reference, | |
1026 '__': anonymous_reference} | |
1027 | |
1028 | |
1029 def _loweralpha_to_int(s, _zero=(ord('a')-1)): | |
1030 return ord(s) - _zero | |
1031 | |
1032 def _upperalpha_to_int(s, _zero=(ord('A')-1)): | |
1033 return ord(s) - _zero | |
1034 | |
1035 def _lowerroman_to_int(s): | |
1036 return roman.fromRoman(s.upper()) | |
1037 | |
1038 | |
1039 class Body(RSTState): | |
1040 | |
1041 """ | |
1042 Generic classifier of the first line of a block. | |
1043 """ | |
1044 | |
1045 double_width_pad_char = tableparser.TableParser.double_width_pad_char | |
1046 """Padding character for East Asian double-width text.""" | |
1047 | |
1048 enum = Struct() | |
1049 """Enumerated list parsing information.""" | |
1050 | |
1051 enum.formatinfo = { | |
1052 'parens': Struct(prefix='(', suffix=')', start=1, end=-1), | |
1053 'rparen': Struct(prefix='', suffix=')', start=0, end=-1), | |
1054 'period': Struct(prefix='', suffix='.', start=0, end=-1)} | |
1055 enum.formats = enum.formatinfo.keys() | |
1056 enum.sequences = ['arabic', 'loweralpha', 'upperalpha', | |
1057 'lowerroman', 'upperroman'] # ORDERED! | |
1058 enum.sequencepats = {'arabic': '[0-9]+', | |
1059 'loweralpha': '[a-z]', | |
1060 'upperalpha': '[A-Z]', | |
1061 'lowerroman': '[ivxlcdm]+', | |
1062 'upperroman': '[IVXLCDM]+',} | |
1063 enum.converters = {'arabic': int, | |
1064 'loweralpha': _loweralpha_to_int, | |
1065 'upperalpha': _upperalpha_to_int, | |
1066 'lowerroman': _lowerroman_to_int, | |
1067 'upperroman': roman.fromRoman} | |
1068 | |
1069 enum.sequenceregexps = {} | |
1070 for sequence in enum.sequences: | |
1071 enum.sequenceregexps[sequence] = re.compile( | |
1072 enum.sequencepats[sequence] + '$', re.UNICODE) | |
1073 | |
1074 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$') | |
1075 """Matches the top (& bottom) of a full table).""" | |
1076 | |
1077 simple_table_top_pat = re.compile('=+( +=+)+ *$') | |
1078 """Matches the top of a simple table.""" | |
1079 | |
1080 simple_table_border_pat = re.compile('=+[ =]*$') | |
1081 """Matches the bottom & header bottom of a simple table.""" | |
1082 | |
1083 pats = {} | |
1084 """Fragments of patterns used by transitions.""" | |
1085 | |
1086 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]' | |
1087 pats['alpha'] = '[a-zA-Z]' | |
1088 pats['alphanum'] = '[a-zA-Z0-9]' | |
1089 pats['alphanumplus'] = '[a-zA-Z0-9_-]' | |
1090 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s' | |
1091 '|%(upperroman)s|#)' % enum.sequencepats) | |
1092 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats | |
1093 # @@@ Loosen up the pattern? Allow Unicode? | |
1094 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats | |
1095 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats | |
1096 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats | |
1097 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats | |
1098 | |
1099 for format in enum.formats: | |
1100 pats[format] = '(?P<%s>%s%s%s)' % ( | |
1101 format, re.escape(enum.formatinfo[format].prefix), | |
1102 pats['enum'], re.escape(enum.formatinfo[format].suffix)) | |
1103 | |
1104 patterns = { | |
1105 'bullet': u'[-+*\u2022\u2023\u2043]( +|$)', | |
1106 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats, | |
1107 'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)', | |
1108 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats, | |
1109 'doctest': r'>>>( +|$)', | |
1110 'line_block': r'\|( +|$)', | |
1111 'grid_table_top': grid_table_top_pat, | |
1112 'simple_table_top': simple_table_top_pat, | |
1113 'explicit_markup': r'\.\.( +|$)', | |
1114 'anonymous': r'__( +|$)', | |
1115 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats, | |
1116 'text': r''} | |
1117 initial_transitions = ( | |
1118 'bullet', | |
1119 'enumerator', | |
1120 'field_marker', | |
1121 'option_marker', | |
1122 'doctest', | |
1123 'line_block', | |
1124 'grid_table_top', | |
1125 'simple_table_top', | |
1126 'explicit_markup', | |
1127 'anonymous', | |
1128 'line', | |
1129 'text') | |
1130 | |
1131 def indent(self, match, context, next_state): | |
1132 """Block quote.""" | |
1133 indented, indent, line_offset, blank_finish = \ | |
1134 self.state_machine.get_indented() | |
1135 elements = self.block_quote(indented, line_offset) | |
1136 self.parent += elements | |
1137 if not blank_finish: | |
1138 self.parent += self.unindent_warning('Block quote') | |
1139 return context, next_state, [] | |
1140 | |
1141 def block_quote(self, indented, line_offset): | |
1142 elements = [] | |
1143 while indented: | |
1144 (blockquote_lines, | |
1145 attribution_lines, | |
1146 attribution_offset, | |
1147 indented, | |
1148 new_line_offset) = self.split_attribution(indented, line_offset) | |
1149 blockquote = nodes.block_quote() | |
1150 self.nested_parse(blockquote_lines, line_offset, blockquote) | |
1151 elements.append(blockquote) | |
1152 if attribution_lines: | |
1153 attribution, messages = self.parse_attribution( | |
1154 attribution_lines, attribution_offset) | |
1155 blockquote += attribution | |
1156 elements += messages | |
1157 line_offset = new_line_offset | |
1158 while indented and not indented[0]: | |
1159 indented = indented[1:] | |
1160 line_offset += 1 | |
1161 return elements | |
1162 | |
1163 # U+2014 is an em-dash: | |
1164 attribution_pattern = re.compile(u'(---?(?!-)|\u2014) *(?=[^ \\n])', | |
1165 re.UNICODE) | |
1166 | |
1167 def split_attribution(self, indented, line_offset): | |
1168 """ | |
1169 Check for a block quote attribution and split it off: | |
1170 | |
1171 * First line after a blank line must begin with a dash ("--", "---", | |
1172 em-dash; matches `self.attribution_pattern`). | |
1173 * Every line after that must have consistent indentation. | |
1174 * Attributions must be preceded by block quote content. | |
1175 | |
1176 Return a tuple of: (block quote content lines, content offset, | |
1177 attribution lines, attribution offset, remaining indented lines). | |
1178 """ | |
1179 blank = None | |
1180 nonblank_seen = False | |
1181 for i in range(len(indented)): | |
1182 line = indented[i].rstrip() | |
1183 if line: | |
1184 if nonblank_seen and blank == i - 1: # last line blank | |
1185 match = self.attribution_pattern.match(line) | |
1186 if match: | |
1187 attribution_end, indent = self.check_attribution( | |
1188 indented, i) | |
1189 if attribution_end: | |
1190 a_lines = indented[i:attribution_end] | |
1191 a_lines.trim_left(match.end(), end=1) | |
1192 a_lines.trim_left(indent, start=1) | |
1193 return (indented[:i], a_lines, | |
1194 i, indented[attribution_end:], | |
1195 line_offset + attribution_end) | |
1196 nonblank_seen = True | |
1197 else: | |
1198 blank = i | |
1199 else: | |
1200 return (indented, None, None, None, None) | |
1201 | |
1202 def check_attribution(self, indented, attribution_start): | |
1203 """ | |
1204 Check attribution shape. | |
1205 Return the index past the end of the attribution, and the indent. | |
1206 """ | |
1207 indent = None | |
1208 i = attribution_start + 1 | |
1209 for i in range(attribution_start + 1, len(indented)): | |
1210 line = indented[i].rstrip() | |
1211 if not line: | |
1212 break | |
1213 if indent is None: | |
1214 indent = len(line) - len(line.lstrip()) | |
1215 elif len(line) - len(line.lstrip()) != indent: | |
1216 return None, None # bad shape; not an attribution | |
1217 else: | |
1218 # return index of line after last attribution line: | |
1219 i += 1 | |
1220 return i, (indent or 0) | |
1221 | |
1222 def parse_attribution(self, indented, line_offset): | |
1223 text = '\n'.join(indented).rstrip() | |
1224 lineno = self.state_machine.abs_line_number() + line_offset | |
1225 textnodes, messages = self.inline_text(text, lineno) | |
1226 node = nodes.attribution(text, '', *textnodes) | |
1227 node.source, node.line = self.state_machine.get_source_and_line(lineno) | |
1228 return node, messages | |
1229 | |
1230 def bullet(self, match, context, next_state): | |
1231 """Bullet list item.""" | |
1232 bulletlist = nodes.bullet_list() | |
1233 self.parent += bulletlist | |
1234 bulletlist['bullet'] = match.string[0] | |
1235 i, blank_finish = self.list_item(match.end()) | |
1236 bulletlist += i | |
1237 offset = self.state_machine.line_offset + 1 # next line | |
1238 new_line_offset, blank_finish = self.nested_list_parse( | |
1239 self.state_machine.input_lines[offset:], | |
1240 input_offset=self.state_machine.abs_line_offset() + 1, | |
1241 node=bulletlist, initial_state='BulletList', | |
1242 blank_finish=blank_finish) | |
1243 self.goto_line(new_line_offset) | |
1244 if not blank_finish: | |
1245 self.parent += self.unindent_warning('Bullet list') | |
1246 return [], next_state, [] | |
1247 | |
1248 def list_item(self, indent): | |
1249 if self.state_machine.line[indent:]: | |
1250 indented, line_offset, blank_finish = ( | |
1251 self.state_machine.get_known_indented(indent)) | |
1252 else: | |
1253 indented, indent, line_offset, blank_finish = ( | |
1254 self.state_machine.get_first_known_indented(indent)) | |
1255 listitem = nodes.list_item('\n'.join(indented)) | |
1256 if indented: | |
1257 self.nested_parse(indented, input_offset=line_offset, | |
1258 node=listitem) | |
1259 return listitem, blank_finish | |
1260 | |
1261 def enumerator(self, match, context, next_state): | |
1262 """Enumerated List Item""" | |
1263 format, sequence, text, ordinal = self.parse_enumerator(match) | |
1264 if not self.is_enumerated_list_item(ordinal, sequence, format): | |
1265 raise statemachine.TransitionCorrection('text') | |
1266 enumlist = nodes.enumerated_list() | |
1267 self.parent += enumlist | |
1268 if sequence == '#': | |
1269 enumlist['enumtype'] = 'arabic' | |
1270 else: | |
1271 enumlist['enumtype'] = sequence | |
1272 enumlist['prefix'] = self.enum.formatinfo[format].prefix | |
1273 enumlist['suffix'] = self.enum.formatinfo[format].suffix | |
1274 if ordinal != 1: | |
1275 enumlist['start'] = ordinal | |
1276 msg = self.reporter.info( | |
1277 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)' | |
1278 % (text, ordinal)) | |
1279 self.parent += msg | |
1280 listitem, blank_finish = self.list_item(match.end()) | |
1281 enumlist += listitem | |
1282 offset = self.state_machine.line_offset + 1 # next line | |
1283 newline_offset, blank_finish = self.nested_list_parse( | |
1284 self.state_machine.input_lines[offset:], | |
1285 input_offset=self.state_machine.abs_line_offset() + 1, | |
1286 node=enumlist, initial_state='EnumeratedList', | |
1287 blank_finish=blank_finish, | |
1288 extra_settings={'lastordinal': ordinal, | |
1289 'format': format, | |
1290 'auto': sequence == '#'}) | |
1291 self.goto_line(newline_offset) | |
1292 if not blank_finish: | |
1293 self.parent += self.unindent_warning('Enumerated list') | |
1294 return [], next_state, [] | |
1295 | |
1296 def parse_enumerator(self, match, expected_sequence=None): | |
1297 """ | |
1298 Analyze an enumerator and return the results. | |
1299 | |
1300 :Return: | |
1301 - the enumerator format ('period', 'parens', or 'rparen'), | |
1302 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.), | |
1303 - the text of the enumerator, stripped of formatting, and | |
1304 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.; | |
1305 ``None`` is returned for invalid enumerator text). | |
1306 | |
1307 The enumerator format has already been determined by the regular | |
1308 expression match. If `expected_sequence` is given, that sequence is | |
1309 tried first. If not, we check for Roman numeral 1. This way, | |
1310 single-character Roman numerals (which are also alphabetical) can be | |
1311 matched. If no sequence has been matched, all sequences are checked in | |
1312 order. | |
1313 """ | |
1314 groupdict = match.groupdict() | |
1315 sequence = '' | |
1316 for format in self.enum.formats: | |
1317 if groupdict[format]: # was this the format matched? | |
1318 break # yes; keep `format` | |
1319 else: # shouldn't happen | |
1320 raise ParserError('enumerator format not matched') | |
1321 text = groupdict[format][self.enum.formatinfo[format].start | |
1322 :self.enum.formatinfo[format].end] | |
1323 if text == '#': | |
1324 sequence = '#' | |
1325 elif expected_sequence: | |
1326 try: | |
1327 if self.enum.sequenceregexps[expected_sequence].match(text): | |
1328 sequence = expected_sequence | |
1329 except KeyError: # shouldn't happen | |
1330 raise ParserError('unknown enumerator sequence: %s' | |
1331 % sequence) | |
1332 elif text == 'i': | |
1333 sequence = 'lowerroman' | |
1334 elif text == 'I': | |
1335 sequence = 'upperroman' | |
1336 if not sequence: | |
1337 for sequence in self.enum.sequences: | |
1338 if self.enum.sequenceregexps[sequence].match(text): | |
1339 break | |
1340 else: # shouldn't happen | |
1341 raise ParserError('enumerator sequence not matched') | |
1342 if sequence == '#': | |
1343 ordinal = 1 | |
1344 else: | |
1345 try: | |
1346 ordinal = self.enum.converters[sequence](text) | |
1347 except roman.InvalidRomanNumeralError: | |
1348 ordinal = None | |
1349 return format, sequence, text, ordinal | |
1350 | |
1351 def is_enumerated_list_item(self, ordinal, sequence, format): | |
1352 """ | |
1353 Check validity based on the ordinal value and the second line. | |
1354 | |
1355 Return true if the ordinal is valid and the second line is blank, | |
1356 indented, or starts with the next enumerator or an auto-enumerator. | |
1357 """ | |
1358 if ordinal is None: | |
1359 return None | |
1360 try: | |
1361 next_line = self.state_machine.next_line() | |
1362 except EOFError: # end of input lines | |
1363 self.state_machine.previous_line() | |
1364 return 1 | |
1365 else: | |
1366 self.state_machine.previous_line() | |
1367 if not next_line[:1].strip(): # blank or indented | |
1368 return 1 | |
1369 result = self.make_enumerator(ordinal + 1, sequence, format) | |
1370 if result: | |
1371 next_enumerator, auto_enumerator = result | |
1372 try: | |
1373 if ( next_line.startswith(next_enumerator) or | |
1374 next_line.startswith(auto_enumerator) ): | |
1375 return 1 | |
1376 except TypeError: | |
1377 pass | |
1378 return None | |
1379 | |
1380 def make_enumerator(self, ordinal, sequence, format): | |
1381 """ | |
1382 Construct and return the next enumerated list item marker, and an | |
1383 auto-enumerator ("#" instead of the regular enumerator). | |
1384 | |
1385 Return ``None`` for invalid (out of range) ordinals. | |
1386 """ #" | |
1387 if sequence == '#': | |
1388 enumerator = '#' | |
1389 elif sequence == 'arabic': | |
1390 enumerator = str(ordinal) | |
1391 else: | |
1392 if sequence.endswith('alpha'): | |
1393 if ordinal > 26: | |
1394 return None | |
1395 enumerator = chr(ordinal + ord('a') - 1) | |
1396 elif sequence.endswith('roman'): | |
1397 try: | |
1398 enumerator = roman.toRoman(ordinal) | |
1399 except roman.RomanError: | |
1400 return None | |
1401 else: # shouldn't happen | |
1402 raise ParserError('unknown enumerator sequence: "%s"' | |
1403 % sequence) | |
1404 if sequence.startswith('lower'): | |
1405 enumerator = enumerator.lower() | |
1406 elif sequence.startswith('upper'): | |
1407 enumerator = enumerator.upper() | |
1408 else: # shouldn't happen | |
1409 raise ParserError('unknown enumerator sequence: "%s"' | |
1410 % sequence) | |
1411 formatinfo = self.enum.formatinfo[format] | |
1412 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix | |
1413 + ' ') | |
1414 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' ' | |
1415 return next_enumerator, auto_enumerator | |
1416 | |
1417 def field_marker(self, match, context, next_state): | |
1418 """Field list item.""" | |
1419 field_list = nodes.field_list() | |
1420 self.parent += field_list | |
1421 field, blank_finish = self.field(match) | |
1422 field_list += field | |
1423 offset = self.state_machine.line_offset + 1 # next line | |
1424 newline_offset, blank_finish = self.nested_list_parse( | |
1425 self.state_machine.input_lines[offset:], | |
1426 input_offset=self.state_machine.abs_line_offset() + 1, | |
1427 node=field_list, initial_state='FieldList', | |
1428 blank_finish=blank_finish) | |
1429 self.goto_line(newline_offset) | |
1430 if not blank_finish: | |
1431 self.parent += self.unindent_warning('Field list') | |
1432 return [], next_state, [] | |
1433 | |
1434 def field(self, match): | |
1435 name = self.parse_field_marker(match) | |
1436 src, srcline = self.state_machine.get_source_and_line() | |
1437 lineno = self.state_machine.abs_line_number() | |
1438 indented, indent, line_offset, blank_finish = \ | |
1439 self.state_machine.get_first_known_indented(match.end()) | |
1440 field_node = nodes.field() | |
1441 field_node.source = src | |
1442 field_node.line = srcline | |
1443 name_nodes, name_messages = self.inline_text(name, lineno) | |
1444 field_node += nodes.field_name(name, '', *name_nodes) | |
1445 field_body = nodes.field_body('\n'.join(indented), *name_messages) | |
1446 field_node += field_body | |
1447 if indented: | |
1448 self.parse_field_body(indented, line_offset, field_body) | |
1449 return field_node, blank_finish | |
1450 | |
1451 def parse_field_marker(self, match): | |
1452 """Extract & return field name from a field marker match.""" | |
1453 field = match.group()[1:] # strip off leading ':' | |
1454 field = field[:field.rfind(':')] # strip off trailing ':' etc. | |
1455 return field | |
1456 | |
1457 def parse_field_body(self, indented, offset, node): | |
1458 self.nested_parse(indented, input_offset=offset, node=node) | |
1459 | |
1460 def option_marker(self, match, context, next_state): | |
1461 """Option list item.""" | |
1462 optionlist = nodes.option_list() | |
1463 try: | |
1464 listitem, blank_finish = self.option_list_item(match) | |
1465 except MarkupError, error: | |
1466 # This shouldn't happen; pattern won't match. | |
1467 msg = self.reporter.error(u'Invalid option list marker: %s' % | |
1468 error) | |
1469 self.parent += msg | |
1470 indented, indent, line_offset, blank_finish = \ | |
1471 self.state_machine.get_first_known_indented(match.end()) | |
1472 elements = self.block_quote(indented, line_offset) | |
1473 self.parent += elements | |
1474 if not blank_finish: | |
1475 self.parent += self.unindent_warning('Option list') | |
1476 return [], next_state, [] | |
1477 self.parent += optionlist | |
1478 optionlist += listitem | |
1479 offset = self.state_machine.line_offset + 1 # next line | |
1480 newline_offset, blank_finish = self.nested_list_parse( | |
1481 self.state_machine.input_lines[offset:], | |
1482 input_offset=self.state_machine.abs_line_offset() + 1, | |
1483 node=optionlist, initial_state='OptionList', | |
1484 blank_finish=blank_finish) | |
1485 self.goto_line(newline_offset) | |
1486 if not blank_finish: | |
1487 self.parent += self.unindent_warning('Option list') | |
1488 return [], next_state, [] | |
1489 | |
1490 def option_list_item(self, match): | |
1491 offset = self.state_machine.abs_line_offset() | |
1492 options = self.parse_option_marker(match) | |
1493 indented, indent, line_offset, blank_finish = \ | |
1494 self.state_machine.get_first_known_indented(match.end()) | |
1495 if not indented: # not an option list item | |
1496 self.goto_line(offset) | |
1497 raise statemachine.TransitionCorrection('text') | |
1498 option_group = nodes.option_group('', *options) | |
1499 description = nodes.description('\n'.join(indented)) | |
1500 option_list_item = nodes.option_list_item('', option_group, | |
1501 description) | |
1502 if indented: | |
1503 self.nested_parse(indented, input_offset=line_offset, | |
1504 node=description) | |
1505 return option_list_item, blank_finish | |
1506 | |
1507 def parse_option_marker(self, match): | |
1508 """ | |
1509 Return a list of `node.option` and `node.option_argument` objects, | |
1510 parsed from an option marker match. | |
1511 | |
1512 :Exception: `MarkupError` for invalid option markers. | |
1513 """ | |
1514 optlist = [] | |
1515 optionstrings = match.group().rstrip().split(', ') | |
1516 for optionstring in optionstrings: | |
1517 tokens = optionstring.split() | |
1518 delimiter = ' ' | |
1519 firstopt = tokens[0].split('=', 1) | |
1520 if len(firstopt) > 1: | |
1521 # "--opt=value" form | |
1522 tokens[:1] = firstopt | |
1523 delimiter = '=' | |
1524 elif (len(tokens[0]) > 2 | |
1525 and ((tokens[0].startswith('-') | |
1526 and not tokens[0].startswith('--')) | |
1527 or tokens[0].startswith('+'))): | |
1528 # "-ovalue" form | |
1529 tokens[:1] = [tokens[0][:2], tokens[0][2:]] | |
1530 delimiter = '' | |
1531 if len(tokens) > 1 and (tokens[1].startswith('<') | |
1532 and tokens[-1].endswith('>')): | |
1533 # "-o <value1 value2>" form; join all values into one token | |
1534 tokens[1:] = [' '.join(tokens[1:])] | |
1535 if 0 < len(tokens) <= 2: | |
1536 option = nodes.option(optionstring) | |
1537 option += nodes.option_string(tokens[0], tokens[0]) | |
1538 if len(tokens) > 1: | |
1539 option += nodes.option_argument(tokens[1], tokens[1], | |
1540 delimiter=delimiter) | |
1541 optlist.append(option) | |
1542 else: | |
1543 raise MarkupError( | |
1544 'wrong number of option tokens (=%s), should be 1 or 2: ' | |
1545 '"%s"' % (len(tokens), optionstring)) | |
1546 return optlist | |
1547 | |
1548 def doctest(self, match, context, next_state): | |
1549 data = '\n'.join(self.state_machine.get_text_block()) | |
1550 self.parent += nodes.doctest_block(data, data) | |
1551 return [], next_state, [] | |
1552 | |
1553 def line_block(self, match, context, next_state): | |
1554 """First line of a line block.""" | |
1555 block = nodes.line_block() | |
1556 self.parent += block | |
1557 lineno = self.state_machine.abs_line_number() | |
1558 line, messages, blank_finish = self.line_block_line(match, lineno) | |
1559 block += line | |
1560 self.parent += messages | |
1561 if not blank_finish: | |
1562 offset = self.state_machine.line_offset + 1 # next line | |
1563 new_line_offset, blank_finish = self.nested_list_parse( | |
1564 self.state_machine.input_lines[offset:], | |
1565 input_offset=self.state_machine.abs_line_offset() + 1, | |
1566 node=block, initial_state='LineBlock', | |
1567 blank_finish=0) | |
1568 self.goto_line(new_line_offset) | |
1569 if not blank_finish: | |
1570 self.parent += self.reporter.warning( | |
1571 'Line block ends without a blank line.', | |
1572 line=lineno+1) | |
1573 if len(block): | |
1574 if block[0].indent is None: | |
1575 block[0].indent = 0 | |
1576 self.nest_line_block_lines(block) | |
1577 return [], next_state, [] | |
1578 | |
1579 def line_block_line(self, match, lineno): | |
1580 """Return one line element of a line_block.""" | |
1581 indented, indent, line_offset, blank_finish = \ | |
1582 self.state_machine.get_first_known_indented(match.end(), | |
1583 until_blank=True) | |
1584 text = u'\n'.join(indented) | |
1585 text_nodes, messages = self.inline_text(text, lineno) | |
1586 line = nodes.line(text, '', *text_nodes) | |
1587 if match.string.rstrip() != '|': # not empty | |
1588 line.indent = len(match.group(1)) - 1 | |
1589 return line, messages, blank_finish | |
1590 | |
1591 def nest_line_block_lines(self, block): | |
1592 for index in range(1, len(block)): | |
1593 if getattr(block[index], 'indent', None) is None: | |
1594 block[index].indent = block[index - 1].indent | |
1595 self.nest_line_block_segment(block) | |
1596 | |
1597 def nest_line_block_segment(self, block): | |
1598 indents = [item.indent for item in block] | |
1599 least = min(indents) | |
1600 new_items = [] | |
1601 new_block = nodes.line_block() | |
1602 for item in block: | |
1603 if item.indent > least: | |
1604 new_block.append(item) | |
1605 else: | |
1606 if len(new_block): | |
1607 self.nest_line_block_segment(new_block) | |
1608 new_items.append(new_block) | |
1609 new_block = nodes.line_block() | |
1610 new_items.append(item) | |
1611 if len(new_block): | |
1612 self.nest_line_block_segment(new_block) | |
1613 new_items.append(new_block) | |
1614 block[:] = new_items | |
1615 | |
1616 def grid_table_top(self, match, context, next_state): | |
1617 """Top border of a full table.""" | |
1618 return self.table_top(match, context, next_state, | |
1619 self.isolate_grid_table, | |
1620 tableparser.GridTableParser) | |
1621 | |
1622 def simple_table_top(self, match, context, next_state): | |
1623 """Top border of a simple table.""" | |
1624 return self.table_top(match, context, next_state, | |
1625 self.isolate_simple_table, | |
1626 tableparser.SimpleTableParser) | |
1627 | |
1628 def table_top(self, match, context, next_state, | |
1629 isolate_function, parser_class): | |
1630 """Top border of a generic table.""" | |
1631 nodelist, blank_finish = self.table(isolate_function, parser_class) | |
1632 self.parent += nodelist | |
1633 if not blank_finish: | |
1634 msg = self.reporter.warning( | |
1635 'Blank line required after table.', | |
1636 line=self.state_machine.abs_line_number()+1) | |
1637 self.parent += msg | |
1638 return [], next_state, [] | |
1639 | |
1640 def table(self, isolate_function, parser_class): | |
1641 """Parse a table.""" | |
1642 block, messages, blank_finish = isolate_function() | |
1643 if block: | |
1644 try: | |
1645 parser = parser_class() | |
1646 tabledata = parser.parse(block) | |
1647 tableline = (self.state_machine.abs_line_number() - len(block) | |
1648 + 1) | |
1649 table = self.build_table(tabledata, tableline) | |
1650 nodelist = [table] + messages | |
1651 except tableparser.TableMarkupError, err: | |
1652 nodelist = self.malformed_table(block, ' '.join(err.args), | |
1653 offset=err.offset) + messages | |
1654 else: | |
1655 nodelist = messages | |
1656 return nodelist, blank_finish | |
1657 | |
1658 def isolate_grid_table(self): | |
1659 messages = [] | |
1660 blank_finish = 1 | |
1661 try: | |
1662 block = self.state_machine.get_text_block(flush_left=True) | |
1663 except statemachine.UnexpectedIndentationError, err: | |
1664 block, src, srcline = err.args | |
1665 messages.append(self.reporter.error('Unexpected indentation.', | |
1666 source=src, line=srcline)) | |
1667 blank_finish = 0 | |
1668 block.disconnect() | |
1669 # for East Asian chars: | |
1670 block.pad_double_width(self.double_width_pad_char) | |
1671 width = len(block[0].strip()) | |
1672 for i in range(len(block)): | |
1673 block[i] = block[i].strip() | |
1674 if block[i][0] not in '+|': # check left edge | |
1675 blank_finish = 0 | |
1676 self.state_machine.previous_line(len(block) - i) | |
1677 del block[i:] | |
1678 break | |
1679 if not self.grid_table_top_pat.match(block[-1]): # find bottom | |
1680 blank_finish = 0 | |
1681 # from second-last to third line of table: | |
1682 for i in range(len(block) - 2, 1, -1): | |
1683 if self.grid_table_top_pat.match(block[i]): | |
1684 self.state_machine.previous_line(len(block) - i + 1) | |
1685 del block[i+1:] | |
1686 break | |
1687 else: | |
1688 messages.extend(self.malformed_table(block)) | |
1689 return [], messages, blank_finish | |
1690 for i in range(len(block)): # check right edge | |
1691 if len(block[i]) != width or block[i][-1] not in '+|': | |
1692 messages.extend(self.malformed_table(block)) | |
1693 return [], messages, blank_finish | |
1694 return block, messages, blank_finish | |
1695 | |
1696 def isolate_simple_table(self): | |
1697 start = self.state_machine.line_offset | |
1698 lines = self.state_machine.input_lines | |
1699 limit = len(lines) - 1 | |
1700 toplen = len(lines[start].strip()) | |
1701 pattern_match = self.simple_table_border_pat.match | |
1702 found = 0 | |
1703 found_at = None | |
1704 i = start + 1 | |
1705 while i <= limit: | |
1706 line = lines[i] | |
1707 match = pattern_match(line) | |
1708 if match: | |
1709 if len(line.strip()) != toplen: | |
1710 self.state_machine.next_line(i - start) | |
1711 messages = self.malformed_table( | |
1712 lines[start:i+1], 'Bottom/header table border does ' | |
1713 'not match top border.') | |
1714 return [], messages, i == limit or not lines[i+1].strip() | |
1715 found += 1 | |
1716 found_at = i | |
1717 if found == 2 or i == limit or not lines[i+1].strip(): | |
1718 end = i | |
1719 break | |
1720 i += 1 | |
1721 else: # reached end of input_lines | |
1722 if found: | |
1723 extra = ' or no blank line after table bottom' | |
1724 self.state_machine.next_line(found_at - start) | |
1725 block = lines[start:found_at+1] | |
1726 else: | |
1727 extra = '' | |
1728 self.state_machine.next_line(i - start - 1) | |
1729 block = lines[start:] | |
1730 messages = self.malformed_table( | |
1731 block, 'No bottom table border found%s.' % extra) | |
1732 return [], messages, not extra | |
1733 self.state_machine.next_line(end - start) | |
1734 block = lines[start:end+1] | |
1735 # for East Asian chars: | |
1736 block.pad_double_width(self.double_width_pad_char) | |
1737 return block, [], end == limit or not lines[end+1].strip() | |
1738 | |
1739 def malformed_table(self, block, detail='', offset=0): | |
1740 block.replace(self.double_width_pad_char, '') | |
1741 data = '\n'.join(block) | |
1742 message = 'Malformed table.' | |
1743 startline = self.state_machine.abs_line_number() - len(block) + 1 | |
1744 if detail: | |
1745 message += '\n' + detail | |
1746 error = self.reporter.error(message, nodes.literal_block(data, data), | |
1747 line=startline+offset) | |
1748 return [error] | |
1749 | |
1750 def build_table(self, tabledata, tableline, stub_columns=0): | |
1751 colwidths, headrows, bodyrows = tabledata | |
1752 table = nodes.table() | |
1753 tgroup = nodes.tgroup(cols=len(colwidths)) | |
1754 table += tgroup | |
1755 for colwidth in colwidths: | |
1756 colspec = nodes.colspec(colwidth=colwidth) | |
1757 if stub_columns: | |
1758 colspec.attributes['stub'] = 1 | |
1759 stub_columns -= 1 | |
1760 tgroup += colspec | |
1761 if headrows: | |
1762 thead = nodes.thead() | |
1763 tgroup += thead | |
1764 for row in headrows: | |
1765 thead += self.build_table_row(row, tableline) | |
1766 tbody = nodes.tbody() | |
1767 tgroup += tbody | |
1768 for row in bodyrows: | |
1769 tbody += self.build_table_row(row, tableline) | |
1770 return table | |
1771 | |
1772 def build_table_row(self, rowdata, tableline): | |
1773 row = nodes.row() | |
1774 for cell in rowdata: | |
1775 if cell is None: | |
1776 continue | |
1777 morerows, morecols, offset, cellblock = cell | |
1778 attributes = {} | |
1779 if morerows: | |
1780 attributes['morerows'] = morerows | |
1781 if morecols: | |
1782 attributes['morecols'] = morecols | |
1783 entry = nodes.entry(**attributes) | |
1784 row += entry | |
1785 if ''.join(cellblock): | |
1786 self.nested_parse(cellblock, input_offset=tableline+offset, | |
1787 node=entry) | |
1788 return row | |
1789 | |
1790 | |
1791 explicit = Struct() | |
1792 """Patterns and constants used for explicit markup recognition.""" | |
1793 | |
1794 explicit.patterns = Struct( | |
1795 target=re.compile(r""" | |
1796 ( | |
1797 _ # anonymous target | |
1798 | # *OR* | |
1799 (?!_) # no underscore at the beginning | |
1800 (?P<quote>`?) # optional open quote | |
1801 (?![ `]) # first char. not space or | |
1802 # backquote | |
1803 (?P<name> # reference name | |
1804 .+? | |
1805 ) | |
1806 %(non_whitespace_escape_before)s | |
1807 (?P=quote) # close quote if open quote used | |
1808 ) | |
1809 (?<!(?<!\x00):) # no unescaped colon at end | |
1810 %(non_whitespace_escape_before)s | |
1811 [ ]? # optional space | |
1812 : # end of reference name | |
1813 ([ ]+|$) # followed by whitespace | |
1814 """ % vars(Inliner), re.VERBOSE | re.UNICODE), | |
1815 reference=re.compile(r""" | |
1816 ( | |
1817 (?P<simple>%(simplename)s)_ | |
1818 | # *OR* | |
1819 ` # open backquote | |
1820 (?![ ]) # not space | |
1821 (?P<phrase>.+?) # hyperlink phrase | |
1822 %(non_whitespace_escape_before)s | |
1823 `_ # close backquote, | |
1824 # reference mark | |
1825 ) | |
1826 $ # end of string | |
1827 """ % vars(Inliner), re.VERBOSE | re.UNICODE), | |
1828 substitution=re.compile(r""" | |
1829 ( | |
1830 (?![ ]) # first char. not space | |
1831 (?P<name>.+?) # substitution text | |
1832 %(non_whitespace_escape_before)s | |
1833 \| # close delimiter | |
1834 ) | |
1835 ([ ]+|$) # followed by whitespace | |
1836 """ % vars(Inliner), | |
1837 re.VERBOSE | re.UNICODE),) | |
1838 | |
1839 def footnote(self, match): | |
1840 src, srcline = self.state_machine.get_source_and_line() | |
1841 indented, indent, offset, blank_finish = \ | |
1842 self.state_machine.get_first_known_indented(match.end()) | |
1843 label = match.group(1) | |
1844 name = normalize_name(label) | |
1845 footnote = nodes.footnote('\n'.join(indented)) | |
1846 footnote.source = src | |
1847 footnote.line = srcline | |
1848 if name[0] == '#': # auto-numbered | |
1849 name = name[1:] # autonumber label | |
1850 footnote['auto'] = 1 | |
1851 if name: | |
1852 footnote['names'].append(name) | |
1853 self.document.note_autofootnote(footnote) | |
1854 elif name == '*': # auto-symbol | |
1855 name = '' | |
1856 footnote['auto'] = '*' | |
1857 self.document.note_symbol_footnote(footnote) | |
1858 else: # manually numbered | |
1859 footnote += nodes.label('', label) | |
1860 footnote['names'].append(name) | |
1861 self.document.note_footnote(footnote) | |
1862 if name: | |
1863 self.document.note_explicit_target(footnote, footnote) | |
1864 else: | |
1865 self.document.set_id(footnote, footnote) | |
1866 if indented: | |
1867 self.nested_parse(indented, input_offset=offset, node=footnote) | |
1868 return [footnote], blank_finish | |
1869 | |
1870 def citation(self, match): | |
1871 src, srcline = self.state_machine.get_source_and_line() | |
1872 indented, indent, offset, blank_finish = \ | |
1873 self.state_machine.get_first_known_indented(match.end()) | |
1874 label = match.group(1) | |
1875 name = normalize_name(label) | |
1876 citation = nodes.citation('\n'.join(indented)) | |
1877 citation.source = src | |
1878 citation.line = srcline | |
1879 citation += nodes.label('', label) | |
1880 citation['names'].append(name) | |
1881 self.document.note_citation(citation) | |
1882 self.document.note_explicit_target(citation, citation) | |
1883 if indented: | |
1884 self.nested_parse(indented, input_offset=offset, node=citation) | |
1885 return [citation], blank_finish | |
1886 | |
1887 def hyperlink_target(self, match): | |
1888 pattern = self.explicit.patterns.target | |
1889 lineno = self.state_machine.abs_line_number() | |
1890 block, indent, offset, blank_finish = \ | |
1891 self.state_machine.get_first_known_indented( | |
1892 match.end(), until_blank=True, strip_indent=False) | |
1893 blocktext = match.string[:match.end()] + '\n'.join(block) | |
1894 block = [escape2null(line) for line in block] | |
1895 escaped = block[0] | |
1896 blockindex = 0 | |
1897 while True: | |
1898 targetmatch = pattern.match(escaped) | |
1899 if targetmatch: | |
1900 break | |
1901 blockindex += 1 | |
1902 try: | |
1903 escaped += block[blockindex] | |
1904 except IndexError: | |
1905 raise MarkupError('malformed hyperlink target.') | |
1906 del block[:blockindex] | |
1907 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip() | |
1908 target = self.make_target(block, blocktext, lineno, | |
1909 targetmatch.group('name')) | |
1910 return [target], blank_finish | |
1911 | |
1912 def make_target(self, block, block_text, lineno, target_name): | |
1913 target_type, data = self.parse_target(block, block_text, lineno) | |
1914 if target_type == 'refname': | |
1915 target = nodes.target(block_text, '', refname=normalize_name(data)) | |
1916 target.indirect_reference_name = data | |
1917 self.add_target(target_name, '', target, lineno) | |
1918 self.document.note_indirect_target(target) | |
1919 return target | |
1920 elif target_type == 'refuri': | |
1921 target = nodes.target(block_text, '') | |
1922 self.add_target(target_name, data, target, lineno) | |
1923 return target | |
1924 else: | |
1925 return data | |
1926 | |
1927 def parse_target(self, block, block_text, lineno): | |
1928 """ | |
1929 Determine the type of reference of a target. | |
1930 | |
1931 :Return: A 2-tuple, one of: | |
1932 | |
1933 - 'refname' and the indirect reference name | |
1934 - 'refuri' and the URI | |
1935 - 'malformed' and a system_message node | |
1936 """ | |
1937 if block and block[-1].strip()[-1:] == '_': # possible indirect target | |
1938 reference = ' '.join([line.strip() for line in block]) | |
1939 refname = self.is_reference(reference) | |
1940 if refname: | |
1941 return 'refname', refname | |
1942 reference = ''.join([''.join(line.split()) for line in block]) | |
1943 return 'refuri', unescape(reference) | |
1944 | |
1945 def is_reference(self, reference): | |
1946 match = self.explicit.patterns.reference.match( | |
1947 whitespace_normalize_name(reference)) | |
1948 if not match: | |
1949 return None | |
1950 return unescape(match.group('simple') or match.group('phrase')) | |
1951 | |
1952 def add_target(self, targetname, refuri, target, lineno): | |
1953 target.line = lineno | |
1954 if targetname: | |
1955 name = normalize_name(unescape(targetname)) | |
1956 target['names'].append(name) | |
1957 if refuri: | |
1958 uri = self.inliner.adjust_uri(refuri) | |
1959 if uri: | |
1960 target['refuri'] = uri | |
1961 else: | |
1962 raise ApplicationError('problem with URI: %r' % refuri) | |
1963 self.document.note_explicit_target(target, self.parent) | |
1964 else: # anonymous target | |
1965 if refuri: | |
1966 target['refuri'] = refuri | |
1967 target['anonymous'] = 1 | |
1968 self.document.note_anonymous_target(target) | |
1969 | |
1970 def substitution_def(self, match): | |
1971 pattern = self.explicit.patterns.substitution | |
1972 src, srcline = self.state_machine.get_source_and_line() | |
1973 block, indent, offset, blank_finish = \ | |
1974 self.state_machine.get_first_known_indented(match.end(), | |
1975 strip_indent=False) | |
1976 blocktext = (match.string[:match.end()] + '\n'.join(block)) | |
1977 block.disconnect() | |
1978 escaped = escape2null(block[0].rstrip()) | |
1979 blockindex = 0 | |
1980 while True: | |
1981 subdefmatch = pattern.match(escaped) | |
1982 if subdefmatch: | |
1983 break | |
1984 blockindex += 1 | |
1985 try: | |
1986 escaped = escaped + ' ' + escape2null(block[blockindex].strip()) | |
1987 except IndexError: | |
1988 raise MarkupError('malformed substitution definition.') | |
1989 del block[:blockindex] # strip out the substitution marker | |
1990 block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1] | |
1991 if not block[0]: | |
1992 del block[0] | |
1993 offset += 1 | |
1994 while block and not block[-1].strip(): | |
1995 block.pop() | |
1996 subname = subdefmatch.group('name') | |
1997 substitution_node = nodes.substitution_definition(blocktext) | |
1998 substitution_node.source = src | |
1999 substitution_node.line = srcline | |
2000 if not block: | |
2001 msg = self.reporter.warning( | |
2002 'Substitution definition "%s" missing contents.' % subname, | |
2003 nodes.literal_block(blocktext, blocktext), | |
2004 source=src, line=srcline) | |
2005 return [msg], blank_finish | |
2006 block[0] = block[0].strip() | |
2007 substitution_node['names'].append( | |
2008 nodes.whitespace_normalize_name(subname)) | |
2009 new_abs_offset, blank_finish = self.nested_list_parse( | |
2010 block, input_offset=offset, node=substitution_node, | |
2011 initial_state='SubstitutionDef', blank_finish=blank_finish) | |
2012 i = 0 | |
2013 for node in substitution_node[:]: | |
2014 if not (isinstance(node, nodes.Inline) or | |
2015 isinstance(node, nodes.Text)): | |
2016 self.parent += substitution_node[i] | |
2017 del substitution_node[i] | |
2018 else: | |
2019 i += 1 | |
2020 for node in substitution_node.traverse(nodes.Element): | |
2021 if self.disallowed_inside_substitution_definitions(node): | |
2022 pformat = nodes.literal_block('', node.pformat().rstrip()) | |
2023 msg = self.reporter.error( | |
2024 'Substitution definition contains illegal element:', | |
2025 pformat, nodes.literal_block(blocktext, blocktext), | |
2026 source=src, line=srcline) | |
2027 return [msg], blank_finish | |
2028 if len(substitution_node) == 0: | |
2029 msg = self.reporter.warning( | |
2030 'Substitution definition "%s" empty or invalid.' % subname, | |
2031 nodes.literal_block(blocktext, blocktext), | |
2032 source=src, line=srcline) | |
2033 return [msg], blank_finish | |
2034 self.document.note_substitution_def( | |
2035 substitution_node, subname, self.parent) | |
2036 return [substitution_node], blank_finish | |
2037 | |
2038 def disallowed_inside_substitution_definitions(self, node): | |
2039 if (node['ids'] or | |
2040 isinstance(node, nodes.reference) and node.get('anonymous') or | |
2041 isinstance(node, nodes.footnote_reference) and node.get('auto')): | |
2042 return 1 | |
2043 else: | |
2044 return 0 | |
2045 | |
2046 def directive(self, match, **option_presets): | |
2047 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean.""" | |
2048 type_name = match.group(1) | |
2049 directive_class, messages = directives.directive( | |
2050 type_name, self.memo.language, self.document) | |
2051 self.parent += messages | |
2052 if directive_class: | |
2053 return self.run_directive( | |
2054 directive_class, match, type_name, option_presets) | |
2055 else: | |
2056 return self.unknown_directive(type_name) | |
2057 | |
2058 def run_directive(self, directive, match, type_name, option_presets): | |
2059 """ | |
2060 Parse a directive then run its directive function. | |
2061 | |
2062 Parameters: | |
2063 | |
2064 - `directive`: The class implementing the directive. Must be | |
2065 a subclass of `rst.Directive`. | |
2066 | |
2067 - `match`: A regular expression match object which matched the first | |
2068 line of the directive. | |
2069 | |
2070 - `type_name`: The directive name, as used in the source text. | |
2071 | |
2072 - `option_presets`: A dictionary of preset options, defaults for the | |
2073 directive options. Currently, only an "alt" option is passed by | |
2074 substitution definitions (value: the substitution name), which may | |
2075 be used by an embedded image directive. | |
2076 | |
2077 Returns a 2-tuple: list of nodes, and a "blank finish" boolean. | |
2078 """ | |
2079 if isinstance(directive, (FunctionType, MethodType)): | |
2080 from docutils.parsers.rst import convert_directive_function | |
2081 directive = convert_directive_function(directive) | |
2082 lineno = self.state_machine.abs_line_number() | |
2083 initial_line_offset = self.state_machine.line_offset | |
2084 indented, indent, line_offset, blank_finish \ | |
2085 = self.state_machine.get_first_known_indented(match.end(), | |
2086 strip_top=0) | |
2087 block_text = '\n'.join(self.state_machine.input_lines[ | |
2088 initial_line_offset : self.state_machine.line_offset + 1]) | |
2089 try: | |
2090 arguments, options, content, content_offset = ( | |
2091 self.parse_directive_block(indented, line_offset, | |
2092 directive, option_presets)) | |
2093 except MarkupError, detail: | |
2094 error = self.reporter.error( | |
2095 'Error in "%s" directive:\n%s.' % (type_name, | |
2096 ' '.join(detail.args)), | |
2097 nodes.literal_block(block_text, block_text), line=lineno) | |
2098 return [error], blank_finish | |
2099 directive_instance = directive( | |
2100 type_name, arguments, options, content, lineno, | |
2101 content_offset, block_text, self, self.state_machine) | |
2102 try: | |
2103 result = directive_instance.run() | |
2104 except docutils.parsers.rst.DirectiveError, error: | |
2105 msg_node = self.reporter.system_message(error.level, error.msg, | |
2106 line=lineno) | |
2107 msg_node += nodes.literal_block(block_text, block_text) | |
2108 result = [msg_node] | |
2109 assert isinstance(result, list), \ | |
2110 'Directive "%s" must return a list of nodes.' % type_name | |
2111 for i in range(len(result)): | |
2112 assert isinstance(result[i], nodes.Node), \ | |
2113 ('Directive "%s" returned non-Node object (index %s): %r' | |
2114 % (type_name, i, result[i])) | |
2115 return (result, | |
2116 blank_finish or self.state_machine.is_next_line_blank()) | |
2117 | |
2118 def parse_directive_block(self, indented, line_offset, directive, | |
2119 option_presets): | |
2120 option_spec = directive.option_spec | |
2121 has_content = directive.has_content | |
2122 if indented and not indented[0].strip(): | |
2123 indented.trim_start() | |
2124 line_offset += 1 | |
2125 while indented and not indented[-1].strip(): | |
2126 indented.trim_end() | |
2127 if indented and (directive.required_arguments | |
2128 or directive.optional_arguments | |
2129 or option_spec): | |
2130 for i, line in enumerate(indented): | |
2131 if not line.strip(): | |
2132 break | |
2133 else: | |
2134 i += 1 | |
2135 arg_block = indented[:i] | |
2136 content = indented[i+1:] | |
2137 content_offset = line_offset + i + 1 | |
2138 else: | |
2139 content = indented | |
2140 content_offset = line_offset | |
2141 arg_block = [] | |
2142 if option_spec: | |
2143 options, arg_block = self.parse_directive_options( | |
2144 option_presets, option_spec, arg_block) | |
2145 else: | |
2146 options = {} | |
2147 if arg_block and not (directive.required_arguments | |
2148 or directive.optional_arguments): | |
2149 content = arg_block + indented[i:] | |
2150 content_offset = line_offset | |
2151 arg_block = [] | |
2152 while content and not content[0].strip(): | |
2153 content.trim_start() | |
2154 content_offset += 1 | |
2155 if directive.required_arguments or directive.optional_arguments: | |
2156 arguments = self.parse_directive_arguments( | |
2157 directive, arg_block) | |
2158 else: | |
2159 arguments = [] | |
2160 if content and not has_content: | |
2161 raise MarkupError('no content permitted') | |
2162 return (arguments, options, content, content_offset) | |
2163 | |
2164 def parse_directive_options(self, option_presets, option_spec, arg_block): | |
2165 options = option_presets.copy() | |
2166 for i, line in enumerate(arg_block): | |
2167 if re.match(Body.patterns['field_marker'], line): | |
2168 opt_block = arg_block[i:] | |
2169 arg_block = arg_block[:i] | |
2170 break | |
2171 else: | |
2172 opt_block = [] | |
2173 if opt_block: | |
2174 success, data = self.parse_extension_options(option_spec, | |
2175 opt_block) | |
2176 if success: # data is a dict of options | |
2177 options.update(data) | |
2178 else: # data is an error string | |
2179 raise MarkupError(data) | |
2180 return options, arg_block | |
2181 | |
2182 def parse_directive_arguments(self, directive, arg_block): | |
2183 required = directive.required_arguments | |
2184 optional = directive.optional_arguments | |
2185 arg_text = '\n'.join(arg_block) | |
2186 arguments = arg_text.split() | |
2187 if len(arguments) < required: | |
2188 raise MarkupError('%s argument(s) required, %s supplied' | |
2189 % (required, len(arguments))) | |
2190 elif len(arguments) > required + optional: | |
2191 if directive.final_argument_whitespace: | |
2192 arguments = arg_text.split(None, required + optional - 1) | |
2193 else: | |
2194 raise MarkupError( | |
2195 'maximum %s argument(s) allowed, %s supplied' | |
2196 % (required + optional, len(arguments))) | |
2197 return arguments | |
2198 | |
2199 def parse_extension_options(self, option_spec, datalines): | |
2200 """ | |
2201 Parse `datalines` for a field list containing extension options | |
2202 matching `option_spec`. | |
2203 | |
2204 :Parameters: | |
2205 - `option_spec`: a mapping of option name to conversion | |
2206 function, which should raise an exception on bad input. | |
2207 - `datalines`: a list of input strings. | |
2208 | |
2209 :Return: | |
2210 - Success value, 1 or 0. | |
2211 - An option dictionary on success, an error string on failure. | |
2212 """ | |
2213 node = nodes.field_list() | |
2214 newline_offset, blank_finish = self.nested_list_parse( | |
2215 datalines, 0, node, initial_state='ExtensionOptions', | |
2216 blank_finish=True) | |
2217 if newline_offset != len(datalines): # incomplete parse of block | |
2218 return 0, 'invalid option block' | |
2219 try: | |
2220 options = utils.extract_extension_options(node, option_spec) | |
2221 except KeyError, detail: | |
2222 return 0, ('unknown option: "%s"' % detail.args[0]) | |
2223 except (ValueError, TypeError), detail: | |
2224 return 0, ('invalid option value: %s' % ' '.join(detail.args)) | |
2225 except utils.ExtensionOptionError, detail: | |
2226 return 0, ('invalid option data: %s' % ' '.join(detail.args)) | |
2227 if blank_finish: | |
2228 return 1, options | |
2229 else: | |
2230 return 0, 'option data incompletely parsed' | |
2231 | |
2232 def unknown_directive(self, type_name): | |
2233 lineno = self.state_machine.abs_line_number() | |
2234 indented, indent, offset, blank_finish = \ | |
2235 self.state_machine.get_first_known_indented(0, strip_indent=False) | |
2236 text = '\n'.join(indented) | |
2237 error = self.reporter.error( | |
2238 'Unknown directive type "%s".' % type_name, | |
2239 nodes.literal_block(text, text), line=lineno) | |
2240 return [error], blank_finish | |
2241 | |
2242 def comment(self, match): | |
2243 if not match.string[match.end():].strip() \ | |
2244 and self.state_machine.is_next_line_blank(): # an empty comment? | |
2245 return [nodes.comment()], 1 # "A tiny but practical wart." | |
2246 indented, indent, offset, blank_finish = \ | |
2247 self.state_machine.get_first_known_indented(match.end()) | |
2248 while indented and not indented[-1].strip(): | |
2249 indented.trim_end() | |
2250 text = '\n'.join(indented) | |
2251 return [nodes.comment(text, text)], blank_finish | |
2252 | |
2253 explicit.constructs = [ | |
2254 (footnote, | |
2255 re.compile(r""" | |
2256 \.\.[ ]+ # explicit markup start | |
2257 \[ | |
2258 ( # footnote label: | |
2259 [0-9]+ # manually numbered footnote | |
2260 | # *OR* | |
2261 \# # anonymous auto-numbered footnote | |
2262 | # *OR* | |
2263 \#%s # auto-number ed?) footnote label | |
2264 | # *OR* | |
2265 \* # auto-symbol footnote | |
2266 ) | |
2267 \] | |
2268 ([ ]+|$) # whitespace or end of line | |
2269 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)), | |
2270 (citation, | |
2271 re.compile(r""" | |
2272 \.\.[ ]+ # explicit markup start | |
2273 \[(%s)\] # citation label | |
2274 ([ ]+|$) # whitespace or end of line | |
2275 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)), | |
2276 (hyperlink_target, | |
2277 re.compile(r""" | |
2278 \.\.[ ]+ # explicit markup start | |
2279 _ # target indicator | |
2280 (?![ ]|$) # first char. not space or EOL | |
2281 """, re.VERBOSE | re.UNICODE)), | |
2282 (substitution_def, | |
2283 re.compile(r""" | |
2284 \.\.[ ]+ # explicit markup start | |
2285 \| # substitution indicator | |
2286 (?![ ]|$) # first char. not space or EOL | |
2287 """, re.VERBOSE | re.UNICODE)), | |
2288 (directive, | |
2289 re.compile(r""" | |
2290 \.\.[ ]+ # explicit markup start | |
2291 (%s) # directive name | |
2292 [ ]? # optional space | |
2293 :: # directive delimiter | |
2294 ([ ]+|$) # whitespace or end of line | |
2295 """ % Inliner.simplename, re.VERBOSE | re.UNICODE))] | |
2296 | |
2297 def explicit_markup(self, match, context, next_state): | |
2298 """Footnotes, hyperlink targets, directives, comments.""" | |
2299 nodelist, blank_finish = self.explicit_construct(match) | |
2300 self.parent += nodelist | |
2301 self.explicit_list(blank_finish) | |
2302 return [], next_state, [] | |
2303 | |
2304 def explicit_construct(self, match): | |
2305 """Determine which explicit construct this is, parse & return it.""" | |
2306 errors = [] | |
2307 for method, pattern in self.explicit.constructs: | |
2308 expmatch = pattern.match(match.string) | |
2309 if expmatch: | |
2310 try: | |
2311 return method(self, expmatch) | |
2312 except MarkupError, error: | |
2313 lineno = self.state_machine.abs_line_number() | |
2314 message = ' '.join(error.args) | |
2315 errors.append(self.reporter.warning(message, line=lineno)) | |
2316 break | |
2317 nodelist, blank_finish = self.comment(match) | |
2318 return nodelist + errors, blank_finish | |
2319 | |
2320 def explicit_list(self, blank_finish): | |
2321 """ | |
2322 Create a nested state machine for a series of explicit markup | |
2323 constructs (including anonymous hyperlink targets). | |
2324 """ | |
2325 offset = self.state_machine.line_offset + 1 # next line | |
2326 newline_offset, blank_finish = self.nested_list_parse( | |
2327 self.state_machine.input_lines[offset:], | |
2328 input_offset=self.state_machine.abs_line_offset() + 1, | |
2329 node=self.parent, initial_state='Explicit', | |
2330 blank_finish=blank_finish, | |
2331 match_titles=self.state_machine.match_titles) | |
2332 self.goto_line(newline_offset) | |
2333 if not blank_finish: | |
2334 self.parent += self.unindent_warning('Explicit markup') | |
2335 | |
2336 def anonymous(self, match, context, next_state): | |
2337 """Anonymous hyperlink targets.""" | |
2338 nodelist, blank_finish = self.anonymous_target(match) | |
2339 self.parent += nodelist | |
2340 self.explicit_list(blank_finish) | |
2341 return [], next_state, [] | |
2342 | |
2343 def anonymous_target(self, match): | |
2344 lineno = self.state_machine.abs_line_number() | |
2345 block, indent, offset, blank_finish \ | |
2346 = self.state_machine.get_first_known_indented(match.end(), | |
2347 until_blank=True) | |
2348 blocktext = match.string[:match.end()] + '\n'.join(block) | |
2349 block = [escape2null(line) for line in block] | |
2350 target = self.make_target(block, blocktext, lineno, '') | |
2351 return [target], blank_finish | |
2352 | |
2353 def line(self, match, context, next_state): | |
2354 """Section title overline or transition marker.""" | |
2355 if self.state_machine.match_titles: | |
2356 return [match.string], 'Line', [] | |
2357 elif match.string.strip() == '::': | |
2358 raise statemachine.TransitionCorrection('text') | |
2359 elif len(match.string.strip()) < 4: | |
2360 msg = self.reporter.info( | |
2361 'Unexpected possible title overline or transition.\n' | |
2362 "Treating it as ordinary text because it's so short.", | |
2363 line=self.state_machine.abs_line_number()) | |
2364 self.parent += msg | |
2365 raise statemachine.TransitionCorrection('text') | |
2366 else: | |
2367 blocktext = self.state_machine.line | |
2368 msg = self.reporter.severe( | |
2369 'Unexpected section title or transition.', | |
2370 nodes.literal_block(blocktext, blocktext), | |
2371 line=self.state_machine.abs_line_number()) | |
2372 self.parent += msg | |
2373 return [], next_state, [] | |
2374 | |
2375 def text(self, match, context, next_state): | |
2376 """Titles, definition lists, paragraphs.""" | |
2377 return [match.string], 'Text', [] | |
2378 | |
2379 | |
2380 class RFC2822Body(Body): | |
2381 | |
2382 """ | |
2383 RFC2822 headers are only valid as the first constructs in documents. As | |
2384 soon as anything else appears, the `Body` state should take over. | |
2385 """ | |
2386 | |
2387 patterns = Body.patterns.copy() # can't modify the original | |
2388 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)' | |
2389 initial_transitions = [(name, 'Body') | |
2390 for name in Body.initial_transitions] | |
2391 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text' | |
2392 | |
2393 def rfc2822(self, match, context, next_state): | |
2394 """RFC2822-style field list item.""" | |
2395 fieldlist = nodes.field_list(classes=['rfc2822']) | |
2396 self.parent += fieldlist | |
2397 field, blank_finish = self.rfc2822_field(match) | |
2398 fieldlist += field | |
2399 offset = self.state_machine.line_offset + 1 # next line | |
2400 newline_offset, blank_finish = self.nested_list_parse( | |
2401 self.state_machine.input_lines[offset:], | |
2402 input_offset=self.state_machine.abs_line_offset() + 1, | |
2403 node=fieldlist, initial_state='RFC2822List', | |
2404 blank_finish=blank_finish) | |
2405 self.goto_line(newline_offset) | |
2406 if not blank_finish: | |
2407 self.parent += self.unindent_warning( | |
2408 'RFC2822-style field list') | |
2409 return [], next_state, [] | |
2410 | |
2411 def rfc2822_field(self, match): | |
2412 name = match.string[:match.string.find(':')] | |
2413 indented, indent, line_offset, blank_finish = \ | |
2414 self.state_machine.get_first_known_indented(match.end(), | |
2415 until_blank=True) | |
2416 fieldnode = nodes.field() | |
2417 fieldnode += nodes.field_name(name, name) | |
2418 fieldbody = nodes.field_body('\n'.join(indented)) | |
2419 fieldnode += fieldbody | |
2420 if indented: | |
2421 self.nested_parse(indented, input_offset=line_offset, | |
2422 node=fieldbody) | |
2423 return fieldnode, blank_finish | |
2424 | |
2425 | |
2426 class SpecializedBody(Body): | |
2427 | |
2428 """ | |
2429 Superclass for second and subsequent compound element members. Compound | |
2430 elements are lists and list-like constructs. | |
2431 | |
2432 All transition methods are disabled (redefined as `invalid_input`). | |
2433 Override individual methods in subclasses to re-enable. | |
2434 | |
2435 For example, once an initial bullet list item, say, is recognized, the | |
2436 `BulletList` subclass takes over, with a "bullet_list" node as its | |
2437 container. Upon encountering the initial bullet list item, `Body.bullet` | |
2438 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which | |
2439 starts up a nested parsing session with `BulletList` as the initial state. | |
2440 Only the ``bullet`` transition method is enabled in `BulletList`; as long | |
2441 as only bullet list items are encountered, they are parsed and inserted | |
2442 into the container. The first construct which is *not* a bullet list item | |
2443 triggers the `invalid_input` method, which ends the nested parse and | |
2444 closes the container. `BulletList` needs to recognize input that is | |
2445 invalid in the context of a bullet list, which means everything *other | |
2446 than* bullet list items, so it inherits the transition list created in | |
2447 `Body`. | |
2448 """ | |
2449 | |
2450 def invalid_input(self, match=None, context=None, next_state=None): | |
2451 """Not a compound element member. Abort this state machine.""" | |
2452 self.state_machine.previous_line() # back up so parent SM can reassess | |
2453 raise EOFError | |
2454 | |
2455 indent = invalid_input | |
2456 bullet = invalid_input | |
2457 enumerator = invalid_input | |
2458 field_marker = invalid_input | |
2459 option_marker = invalid_input | |
2460 doctest = invalid_input | |
2461 line_block = invalid_input | |
2462 grid_table_top = invalid_input | |
2463 simple_table_top = invalid_input | |
2464 explicit_markup = invalid_input | |
2465 anonymous = invalid_input | |
2466 line = invalid_input | |
2467 text = invalid_input | |
2468 | |
2469 | |
2470 class BulletList(SpecializedBody): | |
2471 | |
2472 """Second and subsequent bullet_list list_items.""" | |
2473 | |
2474 def bullet(self, match, context, next_state): | |
2475 """Bullet list item.""" | |
2476 if match.string[0] != self.parent['bullet']: | |
2477 # different bullet: new list | |
2478 self.invalid_input() | |
2479 listitem, blank_finish = self.list_item(match.end()) | |
2480 self.parent += listitem | |
2481 self.blank_finish = blank_finish | |
2482 return [], next_state, [] | |
2483 | |
2484 | |
2485 class DefinitionList(SpecializedBody): | |
2486 | |
2487 """Second and subsequent definition_list_items.""" | |
2488 | |
2489 def text(self, match, context, next_state): | |
2490 """Definition lists.""" | |
2491 return [match.string], 'Definition', [] | |
2492 | |
2493 | |
2494 class EnumeratedList(SpecializedBody): | |
2495 | |
2496 """Second and subsequent enumerated_list list_items.""" | |
2497 | |
2498 def enumerator(self, match, context, next_state): | |
2499 """Enumerated list item.""" | |
2500 format, sequence, text, ordinal = self.parse_enumerator( | |
2501 match, self.parent['enumtype']) | |
2502 if ( format != self.format | |
2503 or (sequence != '#' and (sequence != self.parent['enumtype'] | |
2504 or self.auto | |
2505 or ordinal != (self.lastordinal + 1))) | |
2506 or not self.is_enumerated_list_item(ordinal, sequence, format)): | |
2507 # different enumeration: new list | |
2508 self.invalid_input() | |
2509 if sequence == '#': | |
2510 self.auto = 1 | |
2511 listitem, blank_finish = self.list_item(match.end()) | |
2512 self.parent += listitem | |
2513 self.blank_finish = blank_finish | |
2514 self.lastordinal = ordinal | |
2515 return [], next_state, [] | |
2516 | |
2517 | |
2518 class FieldList(SpecializedBody): | |
2519 | |
2520 """Second and subsequent field_list fields.""" | |
2521 | |
2522 def field_marker(self, match, context, next_state): | |
2523 """Field list field.""" | |
2524 field, blank_finish = self.field(match) | |
2525 self.parent += field | |
2526 self.blank_finish = blank_finish | |
2527 return [], next_state, [] | |
2528 | |
2529 | |
2530 class OptionList(SpecializedBody): | |
2531 | |
2532 """Second and subsequent option_list option_list_items.""" | |
2533 | |
2534 def option_marker(self, match, context, next_state): | |
2535 """Option list item.""" | |
2536 try: | |
2537 option_list_item, blank_finish = self.option_list_item(match) | |
2538 except MarkupError: | |
2539 self.invalid_input() | |
2540 self.parent += option_list_item | |
2541 self.blank_finish = blank_finish | |
2542 return [], next_state, [] | |
2543 | |
2544 | |
2545 class RFC2822List(SpecializedBody, RFC2822Body): | |
2546 | |
2547 """Second and subsequent RFC2822-style field_list fields.""" | |
2548 | |
2549 patterns = RFC2822Body.patterns | |
2550 initial_transitions = RFC2822Body.initial_transitions | |
2551 | |
2552 def rfc2822(self, match, context, next_state): | |
2553 """RFC2822-style field list item.""" | |
2554 field, blank_finish = self.rfc2822_field(match) | |
2555 self.parent += field | |
2556 self.blank_finish = blank_finish | |
2557 return [], 'RFC2822List', [] | |
2558 | |
2559 blank = SpecializedBody.invalid_input | |
2560 | |
2561 | |
2562 class ExtensionOptions(FieldList): | |
2563 | |
2564 """ | |
2565 Parse field_list fields for extension options. | |
2566 | |
2567 No nested parsing is done (including inline markup parsing). | |
2568 """ | |
2569 | |
2570 def parse_field_body(self, indented, offset, node): | |
2571 """Override `Body.parse_field_body` for simpler parsing.""" | |
2572 lines = [] | |
2573 for line in list(indented) + ['']: | |
2574 if line.strip(): | |
2575 lines.append(line) | |
2576 elif lines: | |
2577 text = '\n'.join(lines) | |
2578 node += nodes.paragraph(text, text) | |
2579 lines = [] | |
2580 | |
2581 | |
2582 class LineBlock(SpecializedBody): | |
2583 | |
2584 """Second and subsequent lines of a line_block.""" | |
2585 | |
2586 blank = SpecializedBody.invalid_input | |
2587 | |
2588 def line_block(self, match, context, next_state): | |
2589 """New line of line block.""" | |
2590 lineno = self.state_machine.abs_line_number() | |
2591 line, messages, blank_finish = self.line_block_line(match, lineno) | |
2592 self.parent += line | |
2593 self.parent.parent += messages | |
2594 self.blank_finish = blank_finish | |
2595 return [], next_state, [] | |
2596 | |
2597 | |
2598 class Explicit(SpecializedBody): | |
2599 | |
2600 """Second and subsequent explicit markup construct.""" | |
2601 | |
2602 def explicit_markup(self, match, context, next_state): | |
2603 """Footnotes, hyperlink targets, directives, comments.""" | |
2604 nodelist, blank_finish = self.explicit_construct(match) | |
2605 self.parent += nodelist | |
2606 self.blank_finish = blank_finish | |
2607 return [], next_state, [] | |
2608 | |
2609 def anonymous(self, match, context, next_state): | |
2610 """Anonymous hyperlink targets.""" | |
2611 nodelist, blank_finish = self.anonymous_target(match) | |
2612 self.parent += nodelist | |
2613 self.blank_finish = blank_finish | |
2614 return [], next_state, [] | |
2615 | |
2616 blank = SpecializedBody.invalid_input | |
2617 | |
2618 | |
2619 class SubstitutionDef(Body): | |
2620 | |
2621 """ | |
2622 Parser for the contents of a substitution_definition element. | |
2623 """ | |
2624 | |
2625 patterns = { | |
2626 'embedded_directive': re.compile(r'(%s)::( +|$)' | |
2627 % Inliner.simplename, re.UNICODE), | |
2628 'text': r''} | |
2629 initial_transitions = ['embedded_directive', 'text'] | |
2630 | |
2631 def embedded_directive(self, match, context, next_state): | |
2632 nodelist, blank_finish = self.directive(match, | |
2633 alt=self.parent['names'][0]) | |
2634 self.parent += nodelist | |
2635 if not self.state_machine.at_eof(): | |
2636 self.blank_finish = blank_finish | |
2637 raise EOFError | |
2638 | |
2639 def text(self, match, context, next_state): | |
2640 if not self.state_machine.at_eof(): | |
2641 self.blank_finish = self.state_machine.is_next_line_blank() | |
2642 raise EOFError | |
2643 | |
2644 | |
2645 class Text(RSTState): | |
2646 | |
2647 """ | |
2648 Classifier of second line of a text block. | |
2649 | |
2650 Could be a paragraph, a definition list item, or a title. | |
2651 """ | |
2652 | |
2653 patterns = {'underline': Body.patterns['line'], | |
2654 'text': r''} | |
2655 initial_transitions = [('underline', 'Body'), ('text', 'Body')] | |
2656 | |
2657 def blank(self, match, context, next_state): | |
2658 """End of paragraph.""" | |
2659 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext | |
2660 paragraph, literalnext = self.paragraph( | |
2661 context, self.state_machine.abs_line_number() - 1) | |
2662 self.parent += paragraph | |
2663 if literalnext: | |
2664 self.parent += self.literal_block() | |
2665 return [], 'Body', [] | |
2666 | |
2667 def eof(self, context): | |
2668 if context: | |
2669 self.blank(None, context, None) | |
2670 return [] | |
2671 | |
2672 def indent(self, match, context, next_state): | |
2673 """Definition list item.""" | |
2674 definitionlist = nodes.definition_list() | |
2675 definitionlistitem, blank_finish = self.definition_list_item(context) | |
2676 definitionlist += definitionlistitem | |
2677 self.parent += definitionlist | |
2678 offset = self.state_machine.line_offset + 1 # next line | |
2679 newline_offset, blank_finish = self.nested_list_parse( | |
2680 self.state_machine.input_lines[offset:], | |
2681 input_offset=self.state_machine.abs_line_offset() + 1, | |
2682 node=definitionlist, initial_state='DefinitionList', | |
2683 blank_finish=blank_finish, blank_finish_state='Definition') | |
2684 self.goto_line(newline_offset) | |
2685 if not blank_finish: | |
2686 self.parent += self.unindent_warning('Definition list') | |
2687 return [], 'Body', [] | |
2688 | |
2689 def underline(self, match, context, next_state): | |
2690 """Section title.""" | |
2691 lineno = self.state_machine.abs_line_number() | |
2692 title = context[0].rstrip() | |
2693 underline = match.string.rstrip() | |
2694 source = title + '\n' + underline | |
2695 messages = [] | |
2696 if column_width(title) > len(underline): | |
2697 if len(underline) < 4: | |
2698 if self.state_machine.match_titles: | |
2699 msg = self.reporter.info( | |
2700 'Possible title underline, too short for the title.\n' | |
2701 "Treating it as ordinary text because it's so short.", | |
2702 line=lineno) | |
2703 self.parent += msg | |
2704 raise statemachine.TransitionCorrection('text') | |
2705 else: | |
2706 blocktext = context[0] + '\n' + self.state_machine.line | |
2707 msg = self.reporter.warning('Title underline too short.', | |
2708 nodes.literal_block(blocktext, blocktext), line=lineno) | |
2709 messages.append(msg) | |
2710 if not self.state_machine.match_titles: | |
2711 blocktext = context[0] + '\n' + self.state_machine.line | |
2712 # We need get_source_and_line() here to report correctly | |
2713 src, srcline = self.state_machine.get_source_and_line() | |
2714 # TODO: why is abs_line_number() == srcline+1 | |
2715 # if the error is in a table (try with test_tables.py)? | |
2716 # print "get_source_and_line", srcline | |
2717 # print "abs_line_number", self.state_machine.abs_line_number() | |
2718 msg = self.reporter.severe('Unexpected section title.', | |
2719 nodes.literal_block(blocktext, blocktext), | |
2720 source=src, line=srcline) | |
2721 self.parent += messages | |
2722 self.parent += msg | |
2723 return [], next_state, [] | |
2724 style = underline[0] | |
2725 context[:] = [] | |
2726 self.section(title, source, style, lineno - 1, messages) | |
2727 return [], next_state, [] | |
2728 | |
2729 def text(self, match, context, next_state): | |
2730 """Paragraph.""" | |
2731 startline = self.state_machine.abs_line_number() - 1 | |
2732 msg = None | |
2733 try: | |
2734 block = self.state_machine.get_text_block(flush_left=True) | |
2735 except statemachine.UnexpectedIndentationError, err: | |
2736 block, src, srcline = err.args | |
2737 msg = self.reporter.error('Unexpected indentation.', | |
2738 source=src, line=srcline) | |
2739 lines = context + list(block) | |
2740 paragraph, literalnext = self.paragraph(lines, startline) | |
2741 self.parent += paragraph | |
2742 self.parent += msg | |
2743 if literalnext: | |
2744 try: | |
2745 self.state_machine.next_line() | |
2746 except EOFError: | |
2747 pass | |
2748 self.parent += self.literal_block() | |
2749 return [], next_state, [] | |
2750 | |
2751 def literal_block(self): | |
2752 """Return a list of nodes.""" | |
2753 indented, indent, offset, blank_finish = \ | |
2754 self.state_machine.get_indented() | |
2755 while indented and not indented[-1].strip(): | |
2756 indented.trim_end() | |
2757 if not indented: | |
2758 return self.quoted_literal_block() | |
2759 data = '\n'.join(indented) | |
2760 literal_block = nodes.literal_block(data, data) | |
2761 literal_block.line = offset + 1 | |
2762 nodelist = [literal_block] | |
2763 if not blank_finish: | |
2764 nodelist.append(self.unindent_warning('Literal block')) | |
2765 return nodelist | |
2766 | |
2767 def quoted_literal_block(self): | |
2768 abs_line_offset = self.state_machine.abs_line_offset() | |
2769 offset = self.state_machine.line_offset | |
2770 parent_node = nodes.Element() | |
2771 new_abs_offset = self.nested_parse( | |
2772 self.state_machine.input_lines[offset:], | |
2773 input_offset=abs_line_offset, node=parent_node, match_titles=False, | |
2774 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,), | |
2775 'initial_state': 'QuotedLiteralBlock'}) | |
2776 self.goto_line(new_abs_offset) | |
2777 return parent_node.children | |
2778 | |
2779 def definition_list_item(self, termline): | |
2780 indented, indent, line_offset, blank_finish = \ | |
2781 self.state_machine.get_indented() | |
2782 itemnode = nodes.definition_list_item( | |
2783 '\n'.join(termline + list(indented))) | |
2784 lineno = self.state_machine.abs_line_number() - 1 | |
2785 (itemnode.source, | |
2786 itemnode.line) = self.state_machine.get_source_and_line(lineno) | |
2787 termlist, messages = self.term(termline, lineno) | |
2788 itemnode += termlist | |
2789 definition = nodes.definition('', *messages) | |
2790 itemnode += definition | |
2791 if termline[0][-2:] == '::': | |
2792 definition += self.reporter.info( | |
2793 'Blank line missing before literal block (after the "::")? ' | |
2794 'Interpreted as a definition list item.', | |
2795 line=lineno+1) | |
2796 self.nested_parse(indented, input_offset=line_offset, node=definition) | |
2797 return itemnode, blank_finish | |
2798 | |
2799 classifier_delimiter = re.compile(' +: +') | |
2800 | |
2801 def term(self, lines, lineno): | |
2802 """Return a definition_list's term and optional classifiers.""" | |
2803 assert len(lines) == 1 | |
2804 text_nodes, messages = self.inline_text(lines[0], lineno) | |
2805 term_node = nodes.term() | |
2806 (term_node.source, | |
2807 term_node.line) = self.state_machine.get_source_and_line(lineno) | |
2808 term_node.rawsource = unescape(lines[0]) | |
2809 node_list = [term_node] | |
2810 for i in range(len(text_nodes)): | |
2811 node = text_nodes[i] | |
2812 if isinstance(node, nodes.Text): | |
2813 parts = self.classifier_delimiter.split(node.rawsource) | |
2814 if len(parts) == 1: | |
2815 node_list[-1] += node | |
2816 else: | |
2817 | |
2818 node_list[-1] += nodes.Text(parts[0].rstrip()) | |
2819 for part in parts[1:]: | |
2820 classifier_node = nodes.classifier('', part) | |
2821 node_list.append(classifier_node) | |
2822 else: | |
2823 node_list[-1] += node | |
2824 return node_list, messages | |
2825 | |
2826 | |
2827 class SpecializedText(Text): | |
2828 | |
2829 """ | |
2830 Superclass for second and subsequent lines of Text-variants. | |
2831 | |
2832 All transition methods are disabled. Override individual methods in | |
2833 subclasses to re-enable. | |
2834 """ | |
2835 | |
2836 def eof(self, context): | |
2837 """Incomplete construct.""" | |
2838 return [] | |
2839 | |
2840 def invalid_input(self, match=None, context=None, next_state=None): | |
2841 """Not a compound element member. Abort this state machine.""" | |
2842 raise EOFError | |
2843 | |
2844 blank = invalid_input | |
2845 indent = invalid_input | |
2846 underline = invalid_input | |
2847 text = invalid_input | |
2848 | |
2849 | |
2850 class Definition(SpecializedText): | |
2851 | |
2852 """Second line of potential definition_list_item.""" | |
2853 | |
2854 def eof(self, context): | |
2855 """Not a definition.""" | |
2856 self.state_machine.previous_line(2) # so parent SM can reassess | |
2857 return [] | |
2858 | |
2859 def indent(self, match, context, next_state): | |
2860 """Definition list item.""" | |
2861 itemnode, blank_finish = self.definition_list_item(context) | |
2862 self.parent += itemnode | |
2863 self.blank_finish = blank_finish | |
2864 return [], 'DefinitionList', [] | |
2865 | |
2866 | |
2867 class Line(SpecializedText): | |
2868 | |
2869 """ | |
2870 Second line of over- & underlined section title or transition marker. | |
2871 """ | |
2872 | |
2873 eofcheck = 1 # @@@ ??? | |
2874 """Set to 0 while parsing sections, so that we don't catch the EOF.""" | |
2875 | |
2876 def eof(self, context): | |
2877 """Transition marker at end of section or document.""" | |
2878 marker = context[0].strip() | |
2879 if self.memo.section_bubble_up_kludge: | |
2880 self.memo.section_bubble_up_kludge = False | |
2881 elif len(marker) < 4: | |
2882 self.state_correction(context) | |
2883 if self.eofcheck: # ignore EOFError with sections | |
2884 lineno = self.state_machine.abs_line_number() - 1 | |
2885 transition = nodes.transition(rawsource=context[0]) | |
2886 transition.line = lineno | |
2887 self.parent += transition | |
2888 self.eofcheck = 1 | |
2889 return [] | |
2890 | |
2891 def blank(self, match, context, next_state): | |
2892 """Transition marker.""" | |
2893 src, srcline = self.state_machine.get_source_and_line() | |
2894 marker = context[0].strip() | |
2895 if len(marker) < 4: | |
2896 self.state_correction(context) | |
2897 transition = nodes.transition(rawsource=marker) | |
2898 transition.source = src | |
2899 transition.line = srcline - 1 | |
2900 self.parent += transition | |
2901 return [], 'Body', [] | |
2902 | |
2903 def text(self, match, context, next_state): | |
2904 """Potential over- & underlined title.""" | |
2905 lineno = self.state_machine.abs_line_number() - 1 | |
2906 overline = context[0] | |
2907 title = match.string | |
2908 underline = '' | |
2909 try: | |
2910 underline = self.state_machine.next_line() | |
2911 except EOFError: | |
2912 blocktext = overline + '\n' + title | |
2913 if len(overline.rstrip()) < 4: | |
2914 self.short_overline(context, blocktext, lineno, 2) | |
2915 else: | |
2916 msg = self.reporter.severe( | |
2917 'Incomplete section title.', | |
2918 nodes.literal_block(blocktext, blocktext), | |
2919 line=lineno) | |
2920 self.parent += msg | |
2921 return [], 'Body', [] | |
2922 source = '%s\n%s\n%s' % (overline, title, underline) | |
2923 overline = overline.rstrip() | |
2924 underline = underline.rstrip() | |
2925 if not self.transitions['underline'][0].match(underline): | |
2926 blocktext = overline + '\n' + title + '\n' + underline | |
2927 if len(overline.rstrip()) < 4: | |
2928 self.short_overline(context, blocktext, lineno, 2) | |
2929 else: | |
2930 msg = self.reporter.severe( | |
2931 'Missing matching underline for section title overline.', | |
2932 nodes.literal_block(source, source), | |
2933 line=lineno) | |
2934 self.parent += msg | |
2935 return [], 'Body', [] | |
2936 elif overline != underline: | |
2937 blocktext = overline + '\n' + title + '\n' + underline | |
2938 if len(overline.rstrip()) < 4: | |
2939 self.short_overline(context, blocktext, lineno, 2) | |
2940 else: | |
2941 msg = self.reporter.severe( | |
2942 'Title overline & underline mismatch.', | |
2943 nodes.literal_block(source, source), | |
2944 line=lineno) | |
2945 self.parent += msg | |
2946 return [], 'Body', [] | |
2947 title = title.rstrip() | |
2948 messages = [] | |
2949 if column_width(title) > len(overline): | |
2950 blocktext = overline + '\n' + title + '\n' + underline | |
2951 if len(overline.rstrip()) < 4: | |
2952 self.short_overline(context, blocktext, lineno, 2) | |
2953 else: | |
2954 msg = self.reporter.warning( | |
2955 'Title overline too short.', | |
2956 nodes.literal_block(source, source), | |
2957 line=lineno) | |
2958 messages.append(msg) | |
2959 style = (overline[0], underline[0]) | |
2960 self.eofcheck = 0 # @@@ not sure this is correct | |
2961 self.section(title.lstrip(), source, style, lineno + 1, messages) | |
2962 self.eofcheck = 1 | |
2963 return [], 'Body', [] | |
2964 | |
2965 indent = text # indented title | |
2966 | |
2967 def underline(self, match, context, next_state): | |
2968 overline = context[0] | |
2969 blocktext = overline + '\n' + self.state_machine.line | |
2970 lineno = self.state_machine.abs_line_number() - 1 | |
2971 if len(overline.rstrip()) < 4: | |
2972 self.short_overline(context, blocktext, lineno, 1) | |
2973 msg = self.reporter.error( | |
2974 'Invalid section title or transition marker.', | |
2975 nodes.literal_block(blocktext, blocktext), | |
2976 line=lineno) | |
2977 self.parent += msg | |
2978 return [], 'Body', [] | |
2979 | |
2980 def short_overline(self, context, blocktext, lineno, lines=1): | |
2981 msg = self.reporter.info( | |
2982 'Possible incomplete section title.\nTreating the overline as ' | |
2983 "ordinary text because it's so short.", | |
2984 line=lineno) | |
2985 self.parent += msg | |
2986 self.state_correction(context, lines) | |
2987 | |
2988 def state_correction(self, context, lines=1): | |
2989 self.state_machine.previous_line(lines) | |
2990 context[:] = [] | |
2991 raise statemachine.StateCorrection('Body', 'text') | |
2992 | |
2993 | |
2994 class QuotedLiteralBlock(RSTState): | |
2995 | |
2996 """ | |
2997 Nested parse handler for quoted (unindented) literal blocks. | |
2998 | |
2999 Special-purpose. Not for inclusion in `state_classes`. | |
3000 """ | |
3001 | |
3002 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats, | |
3003 'text': r''} | |
3004 initial_transitions = ('initial_quoted', 'text') | |
3005 | |
3006 def __init__(self, state_machine, debug=False): | |
3007 RSTState.__init__(self, state_machine, debug) | |
3008 self.messages = [] | |
3009 self.initial_lineno = None | |
3010 | |
3011 def blank(self, match, context, next_state): | |
3012 if context: | |
3013 raise EOFError | |
3014 else: | |
3015 return context, next_state, [] | |
3016 | |
3017 def eof(self, context): | |
3018 if context: | |
3019 src, srcline = self.state_machine.get_source_and_line( | |
3020 self.initial_lineno) | |
3021 text = '\n'.join(context) | |
3022 literal_block = nodes.literal_block(text, text) | |
3023 literal_block.source = src | |
3024 literal_block.line = srcline | |
3025 self.parent += literal_block | |
3026 else: | |
3027 self.parent += self.reporter.warning( | |
3028 'Literal block expected; none found.', | |
3029 line=self.state_machine.abs_line_number()) | |
3030 # src not available, because statemachine.input_lines is empty | |
3031 self.state_machine.previous_line() | |
3032 self.parent += self.messages | |
3033 return [] | |
3034 | |
3035 def indent(self, match, context, next_state): | |
3036 assert context, ('QuotedLiteralBlock.indent: context should not ' | |
3037 'be empty!') | |
3038 self.messages.append( | |
3039 self.reporter.error('Unexpected indentation.', | |
3040 line=self.state_machine.abs_line_number())) | |
3041 self.state_machine.previous_line() | |
3042 raise EOFError | |
3043 | |
3044 def initial_quoted(self, match, context, next_state): | |
3045 """Match arbitrary quote character on the first line only.""" | |
3046 self.remove_transition('initial_quoted') | |
3047 quote = match.string[0] | |
3048 pattern = re.compile(re.escape(quote), re.UNICODE) | |
3049 # New transition matches consistent quotes only: | |
3050 self.add_transition('quoted', | |
3051 (pattern, self.quoted, self.__class__.__name__)) | |
3052 self.initial_lineno = self.state_machine.abs_line_number() | |
3053 return [match.string], next_state, [] | |
3054 | |
3055 def quoted(self, match, context, next_state): | |
3056 """Match consistent quotes on subsequent lines.""" | |
3057 context.append(match.string) | |
3058 return context, next_state, [] | |
3059 | |
3060 def text(self, match, context, next_state): | |
3061 if context: | |
3062 self.messages.append( | |
3063 self.reporter.error('Inconsistent literal block quoting.', | |
3064 line=self.state_machine.abs_line_number())) | |
3065 self.state_machine.previous_line() | |
3066 raise EOFError | |
3067 | |
3068 | |
3069 state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList, | |
3070 OptionList, LineBlock, ExtensionOptions, Explicit, Text, | |
3071 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List) | |
3072 """Standard set of State classes used to start `RSTStateMachine`.""" |