Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/re.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 # | |
2 # Secret Labs' Regular Expression Engine | |
3 # | |
4 # re-compatible interface for the sre matching engine | |
5 # | |
6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. | |
7 # | |
8 # This version of the SRE library can be redistributed under CNRI's | |
9 # Python 1.6 license. For any other use, please contact Secret Labs | |
10 # AB (info@pythonware.com). | |
11 # | |
12 # Portions of this engine have been developed in cooperation with | |
13 # CNRI. Hewlett-Packard provided funding for 1.6 integration and | |
14 # other compatibility work. | |
15 # | |
16 | |
17 r"""Support for regular expressions (RE). | |
18 | |
19 This module provides regular expression matching operations similar to | |
20 those found in Perl. It supports both 8-bit and Unicode strings; both | |
21 the pattern and the strings being processed can contain null bytes and | |
22 characters outside the US ASCII range. | |
23 | |
24 Regular expressions can contain both special and ordinary characters. | |
25 Most ordinary characters, like "A", "a", or "0", are the simplest | |
26 regular expressions; they simply match themselves. You can | |
27 concatenate ordinary characters, so last matches the string 'last'. | |
28 | |
29 The special characters are: | |
30 "." Matches any character except a newline. | |
31 "^" Matches the start of the string. | |
32 "$" Matches the end of the string or just before the newline at | |
33 the end of the string. | |
34 "*" Matches 0 or more (greedy) repetitions of the preceding RE. | |
35 Greedy means that it will match as many repetitions as possible. | |
36 "+" Matches 1 or more (greedy) repetitions of the preceding RE. | |
37 "?" Matches 0 or 1 (greedy) of the preceding RE. | |
38 *?,+?,?? Non-greedy versions of the previous three special characters. | |
39 {m,n} Matches from m to n repetitions of the preceding RE. | |
40 {m,n}? Non-greedy version of the above. | |
41 "\\" Either escapes special characters or signals a special sequence. | |
42 [] Indicates a set of characters. | |
43 A "^" as the first character indicates a complementing set. | |
44 "|" A|B, creates an RE that will match either A or B. | |
45 (...) Matches the RE inside the parentheses. | |
46 The contents can be retrieved or matched later in the string. | |
47 (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below). | |
48 (?:...) Non-grouping version of regular parentheses. | |
49 (?P<name>...) The substring matched by the group is accessible by name. | |
50 (?P=name) Matches the text matched earlier by the group named name. | |
51 (?#...) A comment; ignored. | |
52 (?=...) Matches if ... matches next, but doesn't consume the string. | |
53 (?!...) Matches if ... doesn't match next. | |
54 (?<=...) Matches if preceded by ... (must be fixed length). | |
55 (?<!...) Matches if not preceded by ... (must be fixed length). | |
56 (?(id/name)yes|no) Matches yes pattern if the group with id/name matched, | |
57 the (optional) no pattern otherwise. | |
58 | |
59 The special sequences consist of "\\" and a character from the list | |
60 below. If the ordinary character is not on the list, then the | |
61 resulting RE will match the second character. | |
62 \number Matches the contents of the group of the same number. | |
63 \A Matches only at the start of the string. | |
64 \Z Matches only at the end of the string. | |
65 \b Matches the empty string, but only at the start or end of a word. | |
66 \B Matches the empty string, but not at the start or end of a word. | |
67 \d Matches any decimal digit; equivalent to the set [0-9]. | |
68 \D Matches any non-digit character; equivalent to the set [^0-9]. | |
69 \s Matches any whitespace character; equivalent to [ \t\n\r\f\v]. | |
70 \S Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v]. | |
71 \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]. | |
72 With LOCALE, it will match the set [0-9_] plus characters defined | |
73 as letters for the current locale. | |
74 \W Matches the complement of \w. | |
75 \\ Matches a literal backslash. | |
76 | |
77 This module exports the following functions: | |
78 match Match a regular expression pattern to the beginning of a string. | |
79 search Search a string for the presence of a pattern. | |
80 sub Substitute occurrences of a pattern found in a string. | |
81 subn Same as sub, but also return the number of substitutions made. | |
82 split Split a string by the occurrences of a pattern. | |
83 findall Find all occurrences of a pattern in a string. | |
84 finditer Return an iterator yielding a match object for each match. | |
85 compile Compile a pattern into a RegexObject. | |
86 purge Clear the regular expression cache. | |
87 escape Backslash all non-alphanumerics in a string. | |
88 | |
89 Some of the functions in this module takes flags as optional parameters: | |
90 I IGNORECASE Perform case-insensitive matching. | |
91 L LOCALE Make \w, \W, \b, \B, dependent on the current locale. | |
92 M MULTILINE "^" matches the beginning of lines (after a newline) | |
93 as well as the string. | |
94 "$" matches the end of lines (before a newline) as well | |
95 as the end of the string. | |
96 S DOTALL "." matches any character at all, including the newline. | |
97 X VERBOSE Ignore whitespace and comments for nicer looking RE's. | |
98 U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale. | |
99 | |
100 This module also defines an exception 'error'. | |
101 | |
102 """ | |
103 | |
104 import sys | |
105 import sre_compile | |
106 import sre_parse | |
107 try: | |
108 import _locale | |
109 except ImportError: | |
110 _locale = None | |
111 | |
112 # public symbols | |
113 __all__ = [ "match", "search", "sub", "subn", "split", "findall", | |
114 "compile", "purge", "template", "escape", "I", "L", "M", "S", "X", | |
115 "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", | |
116 "UNICODE", "error" ] | |
117 | |
118 __version__ = "2.2.1" | |
119 | |
120 # flags | |
121 I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case | |
122 L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale | |
123 U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale | |
124 M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline | |
125 S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline | |
126 X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments | |
127 | |
128 # sre extensions (experimental, don't rely on these) | |
129 T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking | |
130 DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation | |
131 | |
132 # sre exception | |
133 error = sre_compile.error | |
134 | |
135 # -------------------------------------------------------------------- | |
136 # public interface | |
137 | |
138 def match(pattern, string, flags=0): | |
139 """Try to apply the pattern at the start of the string, returning | |
140 a match object, or None if no match was found.""" | |
141 return _compile(pattern, flags).match(string) | |
142 | |
143 def search(pattern, string, flags=0): | |
144 """Scan through string looking for a match to the pattern, returning | |
145 a match object, or None if no match was found.""" | |
146 return _compile(pattern, flags).search(string) | |
147 | |
148 def sub(pattern, repl, string, count=0, flags=0): | |
149 """Return the string obtained by replacing the leftmost | |
150 non-overlapping occurrences of the pattern in string by the | |
151 replacement repl. repl can be either a string or a callable; | |
152 if a string, backslash escapes in it are processed. If it is | |
153 a callable, it's passed the match object and must return | |
154 a replacement string to be used.""" | |
155 return _compile(pattern, flags).sub(repl, string, count) | |
156 | |
157 def subn(pattern, repl, string, count=0, flags=0): | |
158 """Return a 2-tuple containing (new_string, number). | |
159 new_string is the string obtained by replacing the leftmost | |
160 non-overlapping occurrences of the pattern in the source | |
161 string by the replacement repl. number is the number of | |
162 substitutions that were made. repl can be either a string or a | |
163 callable; if a string, backslash escapes in it are processed. | |
164 If it is a callable, it's passed the match object and must | |
165 return a replacement string to be used.""" | |
166 return _compile(pattern, flags).subn(repl, string, count) | |
167 | |
168 def split(pattern, string, maxsplit=0, flags=0): | |
169 """Split the source string by the occurrences of the pattern, | |
170 returning a list containing the resulting substrings.""" | |
171 return _compile(pattern, flags).split(string, maxsplit) | |
172 | |
173 def findall(pattern, string, flags=0): | |
174 """Return a list of all non-overlapping matches in the string. | |
175 | |
176 If one or more groups are present in the pattern, return a | |
177 list of groups; this will be a list of tuples if the pattern | |
178 has more than one group. | |
179 | |
180 Empty matches are included in the result.""" | |
181 return _compile(pattern, flags).findall(string) | |
182 | |
183 if sys.hexversion >= 0x02020000: | |
184 __all__.append("finditer") | |
185 def finditer(pattern, string, flags=0): | |
186 """Return an iterator over all non-overlapping matches in the | |
187 string. For each match, the iterator returns a match object. | |
188 | |
189 Empty matches are included in the result.""" | |
190 return _compile(pattern, flags).finditer(string) | |
191 | |
192 def compile(pattern, flags=0): | |
193 "Compile a regular expression pattern, returning a pattern object." | |
194 return _compile(pattern, flags) | |
195 | |
196 def purge(): | |
197 "Clear the regular expression cache" | |
198 _cache.clear() | |
199 _cache_repl.clear() | |
200 | |
201 def template(pattern, flags=0): | |
202 "Compile a template pattern, returning a pattern object" | |
203 return _compile(pattern, flags|T) | |
204 | |
205 _alphanum = frozenset( | |
206 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") | |
207 | |
208 def escape(pattern): | |
209 "Escape all non-alphanumeric characters in pattern." | |
210 s = list(pattern) | |
211 alphanum = _alphanum | |
212 for i, c in enumerate(pattern): | |
213 if c not in alphanum: | |
214 if c == "\000": | |
215 s[i] = "\\000" | |
216 else: | |
217 s[i] = "\\" + c | |
218 return pattern[:0].join(s) | |
219 | |
220 # -------------------------------------------------------------------- | |
221 # internals | |
222 | |
223 _cache = {} | |
224 _cache_repl = {} | |
225 | |
226 _pattern_type = type(sre_compile.compile("", 0)) | |
227 | |
228 _MAXCACHE = 100 | |
229 | |
230 def _compile(*key): | |
231 # internal: compile pattern | |
232 pattern, flags = key | |
233 bypass_cache = flags & DEBUG | |
234 if not bypass_cache: | |
235 cachekey = (type(key[0]),) + key | |
236 try: | |
237 p, loc = _cache[cachekey] | |
238 if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE): | |
239 return p | |
240 except KeyError: | |
241 pass | |
242 if isinstance(pattern, _pattern_type): | |
243 if flags: | |
244 raise ValueError('Cannot process flags argument with a compiled pattern') | |
245 return pattern | |
246 if not sre_compile.isstring(pattern): | |
247 raise TypeError, "first argument must be string or compiled pattern" | |
248 try: | |
249 p = sre_compile.compile(pattern, flags) | |
250 except error, v: | |
251 raise error, v # invalid expression | |
252 if not bypass_cache: | |
253 if len(_cache) >= _MAXCACHE: | |
254 _cache.clear() | |
255 if p.flags & LOCALE: | |
256 if not _locale: | |
257 return p | |
258 loc = _locale.setlocale(_locale.LC_CTYPE) | |
259 else: | |
260 loc = None | |
261 _cache[cachekey] = p, loc | |
262 return p | |
263 | |
264 def _compile_repl(*key): | |
265 # internal: compile replacement pattern | |
266 p = _cache_repl.get(key) | |
267 if p is not None: | |
268 return p | |
269 repl, pattern = key | |
270 try: | |
271 p = sre_parse.parse_template(repl, pattern) | |
272 except error, v: | |
273 raise error, v # invalid expression | |
274 if len(_cache_repl) >= _MAXCACHE: | |
275 _cache_repl.clear() | |
276 _cache_repl[key] = p | |
277 return p | |
278 | |
279 def _expand(pattern, match, template): | |
280 # internal: match.expand implementation hook | |
281 template = sre_parse.parse_template(template, pattern) | |
282 return sre_parse.expand_template(template, match) | |
283 | |
284 def _subx(pattern, template): | |
285 # internal: pattern.sub/subn implementation helper | |
286 template = _compile_repl(template, pattern) | |
287 if not template[0] and len(template[1]) == 1: | |
288 # literal replacement | |
289 return template[1][0] | |
290 def filter(match, template=template): | |
291 return sre_parse.expand_template(template, match) | |
292 return filter | |
293 | |
294 # register myself for pickling | |
295 | |
296 import copy_reg | |
297 | |
298 def _pickle(p): | |
299 return _compile, (p.pattern, p.flags) | |
300 | |
301 copy_reg.pickle(_pattern_type, _pickle, _compile) | |
302 | |
303 # -------------------------------------------------------------------- | |
304 # experimental stuff (see python-dev discussions for details) | |
305 | |
306 class Scanner: | |
307 def __init__(self, lexicon, flags=0): | |
308 from sre_constants import BRANCH, SUBPATTERN | |
309 self.lexicon = lexicon | |
310 # combine phrases into a compound pattern | |
311 p = [] | |
312 s = sre_parse.Pattern() | |
313 s.flags = flags | |
314 for phrase, action in lexicon: | |
315 p.append(sre_parse.SubPattern(s, [ | |
316 (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))), | |
317 ])) | |
318 s.groups = len(p)+1 | |
319 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) | |
320 self.scanner = sre_compile.compile(p) | |
321 def scan(self, string): | |
322 result = [] | |
323 append = result.append | |
324 match = self.scanner.scanner(string).match | |
325 i = 0 | |
326 while 1: | |
327 m = match() | |
328 if not m: | |
329 break | |
330 j = m.end() | |
331 if i == j: | |
332 break | |
333 action = self.lexicon[m.lastindex-1][1] | |
334 if hasattr(action, '__call__'): | |
335 self.match = m | |
336 action = action(self, m.group()) | |
337 if action is not None: | |
338 append(action) | |
339 i = j | |
340 return result, string[i:] |