Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/docutils/utils/code_analyzer.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 #!/usr/bin/python | |
2 # coding: utf-8 | |
3 | |
4 """Lexical analysis of formal languages (i.e. code) using Pygments.""" | |
5 | |
6 # :Author: Georg Brandl; Felix Wiemann; Günter Milde | |
7 # :Date: $Date: 2011-12-20 15:14:21 +0100 (Die, 20. Dez 2011) $ | |
8 # :Copyright: This module has been placed in the public domain. | |
9 | |
10 from docutils import ApplicationError | |
11 try: | |
12 import pygments | |
13 from pygments.lexers import get_lexer_by_name | |
14 from pygments.formatters.html import _get_ttype_class | |
15 with_pygments = True | |
16 except ImportError: | |
17 with_pygments = False | |
18 | |
19 # Filter the following token types from the list of class arguments: | |
20 unstyled_tokens = ['token', # Token (base token type) | |
21 'text', # Token.Text | |
22 ''] # short name for Token and Text | |
23 # (Add, e.g., Token.Punctuation with ``unstyled_tokens += 'punctuation'``.) | |
24 | |
25 class LexerError(ApplicationError): | |
26 pass | |
27 | |
28 class Lexer(object): | |
29 """Parse `code` lines and yield "classified" tokens. | |
30 | |
31 Arguments | |
32 | |
33 code -- string of source code to parse, | |
34 language -- formal language the code is written in, | |
35 tokennames -- either 'long', 'short', or '' (see below). | |
36 | |
37 Merge subsequent tokens of the same token-type. | |
38 | |
39 Iterating over an instance yields the tokens as ``(tokentype, value)`` | |
40 tuples. The value of `tokennames` configures the naming of the tokentype: | |
41 | |
42 'long': downcased full token type name, | |
43 'short': short name defined by pygments.token.STANDARD_TYPES | |
44 (= class argument used in pygments html output), | |
45 'none': skip lexical analysis. | |
46 """ | |
47 | |
48 def __init__(self, code, language, tokennames='short'): | |
49 """ | |
50 Set up a lexical analyzer for `code` in `language`. | |
51 """ | |
52 self.code = code | |
53 self.language = language | |
54 self.tokennames = tokennames | |
55 self.lexer = None | |
56 # get lexical analyzer for `language`: | |
57 if language in ('', 'text') or tokennames == 'none': | |
58 return | |
59 if not with_pygments: | |
60 raise LexerError('Cannot analyze code. ' | |
61 'Pygments package not found.') | |
62 try: | |
63 self.lexer = get_lexer_by_name(self.language) | |
64 except pygments.util.ClassNotFound: | |
65 raise LexerError('Cannot analyze code. ' | |
66 'No Pygments lexer found for "%s".' % language) | |
67 | |
68 # Since version 1.2. (released Jan 01, 2010) Pygments has a | |
69 # TokenMergeFilter. However, this requires Python >= 2.4. When Docutils | |
70 # requires same minimal version, ``self.merge(tokens)`` in __iter__ can | |
71 # be replaced by ``self.lexer.add_filter('tokenmerge')`` in __init__. | |
72 def merge(self, tokens): | |
73 """Merge subsequent tokens of same token-type. | |
74 | |
75 Also strip the final newline (added by pygments). | |
76 """ | |
77 tokens = iter(tokens) | |
78 (lasttype, lastval) = tokens.next() | |
79 for ttype, value in tokens: | |
80 if ttype is lasttype: | |
81 lastval += value | |
82 else: | |
83 yield(lasttype, lastval) | |
84 (lasttype, lastval) = (ttype, value) | |
85 if lastval.endswith('\n'): | |
86 lastval = lastval[:-1] | |
87 if lastval: | |
88 yield(lasttype, lastval) | |
89 | |
90 def __iter__(self): | |
91 """Parse self.code and yield "classified" tokens. | |
92 """ | |
93 if self.lexer is None: | |
94 yield ([], self.code) | |
95 return | |
96 tokens = pygments.lex(self.code, self.lexer) | |
97 for tokentype, value in self.merge(tokens): | |
98 if self.tokennames == 'long': # long CSS class args | |
99 classes = str(tokentype).lower().split('.') | |
100 else: # short CSS class args | |
101 classes = [_get_ttype_class(tokentype)] | |
102 classes = [cls for cls in classes if cls not in unstyled_tokens] | |
103 yield (classes, value) | |
104 | |
105 | |
106 class NumberLines(object): | |
107 """Insert linenumber-tokens at the start of every code line. | |
108 | |
109 Arguments | |
110 | |
111 tokens -- iterable of ``(classes, value)`` tuples | |
112 startline -- first line number | |
113 endline -- last line number | |
114 | |
115 Iterating over an instance yields the tokens with a | |
116 ``(['ln'], '<the line number>')`` token added for every code line. | |
117 Multi-line tokens are splitted.""" | |
118 | |
119 def __init__(self, tokens, startline, endline): | |
120 self.tokens = tokens | |
121 self.startline = startline | |
122 # pad linenumbers, e.g. endline == 100 -> fmt_str = '%3d ' | |
123 self.fmt_str = '%%%dd ' % len(str(endline)) | |
124 | |
125 def __iter__(self): | |
126 lineno = self.startline | |
127 yield (['ln'], self.fmt_str % lineno) | |
128 for ttype, value in self.tokens: | |
129 lines = value.split('\n') | |
130 for line in lines[:-1]: | |
131 yield (ttype, line + '\n') | |
132 lineno += 1 | |
133 yield (['ln'], self.fmt_str % lineno) | |
134 yield (ttype, lines[-1]) |