Mercurial > repos > bcclaywell > argo_navis
annotate venv/lib/python2.7/sre_compile.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
rev | line source |
---|---|
0
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
1 # -*- coding: utf-8 -*- |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
2 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
3 # Secret Labs' Regular Expression Engine |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
4 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
5 # convert template to internal format |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
6 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
7 # Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
8 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
9 # See the sre.py file for information on usage and redistribution. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
10 # |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
11 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
12 """Internal support module for sre""" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
13 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
14 import _sre, sys |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
15 import sre_parse |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
16 from sre_constants import * |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
17 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
18 assert _sre.MAGIC == MAGIC, "SRE module mismatch" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
19 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
20 if _sre.CODESIZE == 2: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
21 MAXCODE = 65535 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
22 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
23 MAXCODE = 0xFFFFFFFFL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
24 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
25 _LITERAL_CODES = set([LITERAL, NOT_LITERAL]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
26 _REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
27 _SUCCESS_CODES = set([SUCCESS, FAILURE]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
28 _ASSERT_CODES = set([ASSERT, ASSERT_NOT]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
29 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
30 # Sets of lowercase characters which have the same uppercase. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
31 _equivalences = ( |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
32 # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
33 (0x69, 0x131), # iı |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
34 # LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
35 (0x73, 0x17f), # sſ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
36 # MICRO SIGN, GREEK SMALL LETTER MU |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
37 (0xb5, 0x3bc), # µμ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
38 # COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
39 (0x345, 0x3b9, 0x1fbe), # \u0345ιι |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
40 # GREEK SMALL LETTER BETA, GREEK BETA SYMBOL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
41 (0x3b2, 0x3d0), # βϐ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
42 # GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
43 (0x3b5, 0x3f5), # εϵ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
44 # GREEK SMALL LETTER THETA, GREEK THETA SYMBOL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
45 (0x3b8, 0x3d1), # θϑ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
46 # GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
47 (0x3ba, 0x3f0), # κϰ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
48 # GREEK SMALL LETTER PI, GREEK PI SYMBOL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
49 (0x3c0, 0x3d6), # πϖ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
50 # GREEK SMALL LETTER RHO, GREEK RHO SYMBOL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
51 (0x3c1, 0x3f1), # ρϱ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
52 # GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
53 (0x3c2, 0x3c3), # ςσ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
54 # GREEK SMALL LETTER PHI, GREEK PHI SYMBOL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
55 (0x3c6, 0x3d5), # φϕ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
56 # LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
57 (0x1e61, 0x1e9b), # ṡẛ |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
58 ) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
59 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
60 # Maps the lowercase code to lowercase codes which have the same uppercase. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
61 _ignorecase_fixes = {i: tuple(j for j in t if i != j) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
62 for t in _equivalences for i in t} |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
63 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
64 def _compile(code, pattern, flags): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
65 # internal: compile a (sub)pattern |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
66 emit = code.append |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
67 _len = len |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
68 LITERAL_CODES = _LITERAL_CODES |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
69 REPEATING_CODES = _REPEATING_CODES |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
70 SUCCESS_CODES = _SUCCESS_CODES |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
71 ASSERT_CODES = _ASSERT_CODES |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
72 if (flags & SRE_FLAG_IGNORECASE and |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
73 not (flags & SRE_FLAG_LOCALE) and |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
74 flags & SRE_FLAG_UNICODE): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
75 fixes = _ignorecase_fixes |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
76 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
77 fixes = None |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
78 for op, av in pattern: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
79 if op in LITERAL_CODES: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
80 if flags & SRE_FLAG_IGNORECASE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
81 lo = _sre.getlower(av, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
82 if fixes and lo in fixes: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
83 emit(OPCODES[IN_IGNORE]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
84 skip = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
85 if op is NOT_LITERAL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
86 emit(OPCODES[NEGATE]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
87 for k in (lo,) + fixes[lo]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
88 emit(OPCODES[LITERAL]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
89 emit(k) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
90 emit(OPCODES[FAILURE]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
91 code[skip] = _len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
92 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
93 emit(OPCODES[OP_IGNORE[op]]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
94 emit(lo) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
95 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
96 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
97 emit(av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
98 elif op is IN: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
99 if flags & SRE_FLAG_IGNORECASE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
100 emit(OPCODES[OP_IGNORE[op]]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
101 def fixup(literal, flags=flags): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
102 return _sre.getlower(literal, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
103 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
104 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
105 fixup = None |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
106 skip = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
107 _compile_charset(av, flags, code, fixup, fixes) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
108 code[skip] = _len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
109 elif op is ANY: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
110 if flags & SRE_FLAG_DOTALL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
111 emit(OPCODES[ANY_ALL]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
112 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
113 emit(OPCODES[ANY]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
114 elif op in REPEATING_CODES: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
115 if flags & SRE_FLAG_TEMPLATE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
116 raise error, "internal: unsupported template operator" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
117 emit(OPCODES[REPEAT]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
118 skip = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
119 emit(av[0]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
120 emit(av[1]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
121 _compile(code, av[2], flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
122 emit(OPCODES[SUCCESS]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
123 code[skip] = _len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
124 elif _simple(av) and op is not REPEAT: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
125 if op is MAX_REPEAT: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
126 emit(OPCODES[REPEAT_ONE]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
127 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
128 emit(OPCODES[MIN_REPEAT_ONE]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
129 skip = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
130 emit(av[0]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
131 emit(av[1]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
132 _compile(code, av[2], flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
133 emit(OPCODES[SUCCESS]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
134 code[skip] = _len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
135 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
136 emit(OPCODES[REPEAT]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
137 skip = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
138 emit(av[0]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
139 emit(av[1]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
140 _compile(code, av[2], flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
141 code[skip] = _len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
142 if op is MAX_REPEAT: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
143 emit(OPCODES[MAX_UNTIL]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
144 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
145 emit(OPCODES[MIN_UNTIL]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
146 elif op is SUBPATTERN: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
147 if av[0]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
148 emit(OPCODES[MARK]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
149 emit((av[0]-1)*2) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
150 # _compile_info(code, av[1], flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
151 _compile(code, av[1], flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
152 if av[0]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
153 emit(OPCODES[MARK]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
154 emit((av[0]-1)*2+1) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
155 elif op in SUCCESS_CODES: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
156 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
157 elif op in ASSERT_CODES: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
158 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
159 skip = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
160 if av[0] >= 0: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
161 emit(0) # look ahead |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
162 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
163 lo, hi = av[1].getwidth() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
164 if lo != hi: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
165 raise error, "look-behind requires fixed-width pattern" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
166 emit(lo) # look behind |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
167 _compile(code, av[1], flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
168 emit(OPCODES[SUCCESS]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
169 code[skip] = _len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
170 elif op is CALL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
171 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
172 skip = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
173 _compile(code, av, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
174 emit(OPCODES[SUCCESS]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
175 code[skip] = _len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
176 elif op is AT: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
177 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
178 if flags & SRE_FLAG_MULTILINE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
179 av = AT_MULTILINE.get(av, av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
180 if flags & SRE_FLAG_LOCALE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
181 av = AT_LOCALE.get(av, av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
182 elif flags & SRE_FLAG_UNICODE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
183 av = AT_UNICODE.get(av, av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
184 emit(ATCODES[av]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
185 elif op is BRANCH: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
186 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
187 tail = [] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
188 tailappend = tail.append |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
189 for av in av[1]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
190 skip = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
191 # _compile_info(code, av, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
192 _compile(code, av, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
193 emit(OPCODES[JUMP]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
194 tailappend(_len(code)); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
195 code[skip] = _len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
196 emit(0) # end of branch |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
197 for tail in tail: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
198 code[tail] = _len(code) - tail |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
199 elif op is CATEGORY: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
200 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
201 if flags & SRE_FLAG_LOCALE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
202 av = CH_LOCALE[av] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
203 elif flags & SRE_FLAG_UNICODE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
204 av = CH_UNICODE[av] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
205 emit(CHCODES[av]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
206 elif op is GROUPREF: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
207 if flags & SRE_FLAG_IGNORECASE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
208 emit(OPCODES[OP_IGNORE[op]]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
209 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
210 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
211 emit(av-1) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
212 elif op is GROUPREF_EXISTS: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
213 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
214 emit(av[0]-1) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
215 skipyes = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
216 _compile(code, av[1], flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
217 if av[2]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
218 emit(OPCODES[JUMP]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
219 skipno = _len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
220 code[skipyes] = _len(code) - skipyes + 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
221 _compile(code, av[2], flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
222 code[skipno] = _len(code) - skipno |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
223 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
224 code[skipyes] = _len(code) - skipyes + 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
225 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
226 raise ValueError, ("unsupported operand type", op) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
227 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
228 def _compile_charset(charset, flags, code, fixup=None, fixes=None): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
229 # compile charset subprogram |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
230 emit = code.append |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
231 for op, av in _optimize_charset(charset, fixup, fixes, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
232 flags & SRE_FLAG_UNICODE): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
233 emit(OPCODES[op]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
234 if op is NEGATE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
235 pass |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
236 elif op is LITERAL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
237 emit(av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
238 elif op is RANGE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
239 emit(av[0]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
240 emit(av[1]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
241 elif op is CHARSET: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
242 code.extend(av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
243 elif op is BIGCHARSET: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
244 code.extend(av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
245 elif op is CATEGORY: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
246 if flags & SRE_FLAG_LOCALE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
247 emit(CHCODES[CH_LOCALE[av]]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
248 elif flags & SRE_FLAG_UNICODE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
249 emit(CHCODES[CH_UNICODE[av]]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
250 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
251 emit(CHCODES[av]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
252 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
253 raise error, "internal: unsupported set operator" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
254 emit(OPCODES[FAILURE]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
255 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
256 def _optimize_charset(charset, fixup, fixes, isunicode): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
257 # internal: optimize character set |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
258 out = [] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
259 tail = [] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
260 charmap = bytearray(256) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
261 for op, av in charset: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
262 while True: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
263 try: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
264 if op is LITERAL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
265 if fixup: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
266 i = fixup(av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
267 charmap[i] = 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
268 if fixes and i in fixes: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
269 for k in fixes[i]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
270 charmap[k] = 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
271 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
272 charmap[av] = 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
273 elif op is RANGE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
274 r = range(av[0], av[1]+1) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
275 if fixup: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
276 r = map(fixup, r) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
277 if fixup and fixes: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
278 for i in r: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
279 charmap[i] = 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
280 if i in fixes: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
281 for k in fixes[i]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
282 charmap[k] = 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
283 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
284 for i in r: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
285 charmap[i] = 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
286 elif op is NEGATE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
287 out.append((op, av)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
288 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
289 tail.append((op, av)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
290 except IndexError: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
291 if len(charmap) == 256: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
292 # character set contains non-UCS1 character codes |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
293 charmap += b'\0' * 0xff00 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
294 continue |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
295 # character set contains non-BMP character codes |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
296 if fixup and isunicode and op is RANGE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
297 lo, hi = av |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
298 ranges = [av] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
299 # There are only two ranges of cased astral characters: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
300 # 10400-1044F (Deseret) and 118A0-118DF (Warang Citi). |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
301 _fixup_range(max(0x10000, lo), min(0x11fff, hi), |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
302 ranges, fixup) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
303 for lo, hi in ranges: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
304 if lo == hi: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
305 tail.append((LITERAL, hi)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
306 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
307 tail.append((RANGE, (lo, hi))) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
308 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
309 tail.append((op, av)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
310 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
311 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
312 # compress character map |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
313 runs = [] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
314 q = 0 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
315 while True: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
316 p = charmap.find(b'\1', q) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
317 if p < 0: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
318 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
319 if len(runs) >= 2: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
320 runs = None |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
321 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
322 q = charmap.find(b'\0', p) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
323 if q < 0: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
324 runs.append((p, len(charmap))) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
325 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
326 runs.append((p, q)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
327 if runs is not None: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
328 # use literal/range |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
329 for p, q in runs: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
330 if q - p == 1: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
331 out.append((LITERAL, p)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
332 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
333 out.append((RANGE, (p, q - 1))) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
334 out += tail |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
335 # if the case was changed or new representation is more compact |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
336 if fixup or len(out) < len(charset): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
337 return out |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
338 # else original character set is good enough |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
339 return charset |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
340 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
341 # use bitmap |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
342 if len(charmap) == 256: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
343 data = _mk_bitmap(charmap) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
344 out.append((CHARSET, data)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
345 out += tail |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
346 return out |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
347 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
348 # To represent a big charset, first a bitmap of all characters in the |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
349 # set is constructed. Then, this bitmap is sliced into chunks of 256 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
350 # characters, duplicate chunks are eliminated, and each chunk is |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
351 # given a number. In the compiled expression, the charset is |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
352 # represented by a 32-bit word sequence, consisting of one word for |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
353 # the number of different chunks, a sequence of 256 bytes (64 words) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
354 # of chunk numbers indexed by their original chunk position, and a |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
355 # sequence of 256-bit chunks (8 words each). |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
356 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
357 # Compression is normally good: in a typical charset, large ranges of |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
358 # Unicode will be either completely excluded (e.g. if only cyrillic |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
359 # letters are to be matched), or completely included (e.g. if large |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
360 # subranges of Kanji match). These ranges will be represented by |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
361 # chunks of all one-bits or all zero-bits. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
362 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
363 # Matching can be also done efficiently: the more significant byte of |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
364 # the Unicode character is an index into the chunk number, and the |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
365 # less significant byte is a bit index in the chunk (just like the |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
366 # CHARSET matching). |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
367 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
368 # In UCS-4 mode, the BIGCHARSET opcode still supports only subsets |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
369 # of the basic multilingual plane; an efficient representation |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
370 # for all of Unicode has not yet been developed. |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
371 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
372 charmap = bytes(charmap) # should be hashable |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
373 comps = {} |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
374 mapping = bytearray(256) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
375 block = 0 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
376 data = bytearray() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
377 for i in range(0, 65536, 256): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
378 chunk = charmap[i: i + 256] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
379 if chunk in comps: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
380 mapping[i // 256] = comps[chunk] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
381 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
382 mapping[i // 256] = comps[chunk] = block |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
383 block += 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
384 data += chunk |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
385 data = _mk_bitmap(data) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
386 data[0:0] = [block] + _bytes_to_codes(mapping) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
387 out.append((BIGCHARSET, data)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
388 out += tail |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
389 return out |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
390 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
391 def _fixup_range(lo, hi, ranges, fixup): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
392 for i in map(fixup, range(lo, hi+1)): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
393 for k, (lo, hi) in enumerate(ranges): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
394 if i < lo: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
395 if l == lo - 1: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
396 ranges[k] = (i, hi) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
397 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
398 ranges.insert(k, (i, i)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
399 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
400 elif i > hi: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
401 if i == hi + 1: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
402 ranges[k] = (lo, i) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
403 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
404 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
405 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
406 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
407 ranges.append((i, i)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
408 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
409 _CODEBITS = _sre.CODESIZE * 8 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
410 _BITS_TRANS = b'0' + b'1' * 255 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
411 def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
412 s = bytes(bits).translate(_BITS_TRANS)[::-1] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
413 return [_int(s[i - _CODEBITS: i], 2) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
414 for i in range(len(s), 0, -_CODEBITS)] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
415 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
416 def _bytes_to_codes(b): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
417 # Convert block indices to word array |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
418 import array |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
419 if _sre.CODESIZE == 2: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
420 code = 'H' |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
421 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
422 code = 'I' |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
423 a = array.array(code, bytes(b)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
424 assert a.itemsize == _sre.CODESIZE |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
425 assert len(a) * a.itemsize == len(b) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
426 return a.tolist() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
427 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
428 def _simple(av): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
429 # check if av is a "simple" operator |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
430 lo, hi = av[2].getwidth() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
431 return lo == hi == 1 and av[2][0][0] != SUBPATTERN |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
432 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
433 def _compile_info(code, pattern, flags): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
434 # internal: compile an info block. in the current version, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
435 # this contains min/max pattern width, and an optional literal |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
436 # prefix or a character map |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
437 lo, hi = pattern.getwidth() |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
438 if lo == 0: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
439 return # not worth it |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
440 # look for a literal prefix |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
441 prefix = [] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
442 prefixappend = prefix.append |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
443 prefix_skip = 0 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
444 charset = [] # not used |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
445 charsetappend = charset.append |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
446 if not (flags & SRE_FLAG_IGNORECASE): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
447 # look for literal prefix |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
448 for op, av in pattern.data: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
449 if op is LITERAL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
450 if len(prefix) == prefix_skip: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
451 prefix_skip = prefix_skip + 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
452 prefixappend(av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
453 elif op is SUBPATTERN and len(av[1]) == 1: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
454 op, av = av[1][0] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
455 if op is LITERAL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
456 prefixappend(av) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
457 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
458 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
459 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
460 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
461 # if no prefix, look for charset prefix |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
462 if not prefix and pattern.data: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
463 op, av = pattern.data[0] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
464 if op is SUBPATTERN and av[1]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
465 op, av = av[1][0] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
466 if op is LITERAL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
467 charsetappend((op, av)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
468 elif op is BRANCH: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
469 c = [] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
470 cappend = c.append |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
471 for p in av[1]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
472 if not p: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
473 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
474 op, av = p[0] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
475 if op is LITERAL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
476 cappend((op, av)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
477 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
478 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
479 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
480 charset = c |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
481 elif op is BRANCH: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
482 c = [] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
483 cappend = c.append |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
484 for p in av[1]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
485 if not p: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
486 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
487 op, av = p[0] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
488 if op is LITERAL: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
489 cappend((op, av)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
490 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
491 break |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
492 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
493 charset = c |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
494 elif op is IN: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
495 charset = av |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
496 ## if prefix: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
497 ## print "*** PREFIX", prefix, prefix_skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
498 ## if charset: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
499 ## print "*** CHARSET", charset |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
500 # add an info block |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
501 emit = code.append |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
502 emit(OPCODES[INFO]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
503 skip = len(code); emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
504 # literal flag |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
505 mask = 0 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
506 if prefix: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
507 mask = SRE_INFO_PREFIX |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
508 if len(prefix) == prefix_skip == len(pattern.data): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
509 mask = mask + SRE_INFO_LITERAL |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
510 elif charset: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
511 mask = mask + SRE_INFO_CHARSET |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
512 emit(mask) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
513 # pattern length |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
514 if lo < MAXCODE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
515 emit(lo) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
516 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
517 emit(MAXCODE) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
518 prefix = prefix[:MAXCODE] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
519 if hi < MAXCODE: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
520 emit(hi) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
521 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
522 emit(0) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
523 # add literal prefix |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
524 if prefix: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
525 emit(len(prefix)) # length |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
526 emit(prefix_skip) # skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
527 code.extend(prefix) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
528 # generate overlap table |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
529 table = [-1] + ([0]*len(prefix)) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
530 for i in xrange(len(prefix)): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
531 table[i+1] = table[i]+1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
532 while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
533 table[i+1] = table[table[i+1]-1]+1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
534 code.extend(table[1:]) # don't store first entry |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
535 elif charset: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
536 _compile_charset(charset, flags, code) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
537 code[skip] = len(code) - skip |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
538 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
539 try: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
540 unicode |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
541 except NameError: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
542 STRING_TYPES = (type(""),) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
543 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
544 STRING_TYPES = (type(""), type(unicode(""))) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
545 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
546 def isstring(obj): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
547 for tp in STRING_TYPES: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
548 if isinstance(obj, tp): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
549 return 1 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
550 return 0 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
551 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
552 def _code(p, flags): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
553 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
554 flags = p.pattern.flags | flags |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
555 code = [] |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
556 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
557 # compile info block |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
558 _compile_info(code, p, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
559 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
560 # compile the pattern |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
561 _compile(code, p.data, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
562 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
563 code.append(OPCODES[SUCCESS]) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
564 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
565 return code |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
566 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
567 def compile(p, flags=0): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
568 # internal: convert pattern list to internal format |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
569 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
570 if isstring(p): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
571 pattern = p |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
572 p = sre_parse.parse(p, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
573 else: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
574 pattern = None |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
575 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
576 code = _code(p, flags) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
577 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
578 # print code |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
579 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
580 # XXX: <fl> get rid of this limitation! |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
581 if p.pattern.groups > 100: |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
582 raise AssertionError( |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
583 "sorry, but this version only supports 100 named groups" |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
584 ) |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
585 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
586 # map in either direction |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
587 groupindex = p.pattern.groupdict |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
588 indexgroup = [None] * p.pattern.groups |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
589 for k, i in groupindex.items(): |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
590 indexgroup[i] = k |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
591 |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
592 return _sre.compile( |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
593 pattern, flags | p.pattern.flags, code, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
594 p.pattern.groups-1, |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
595 groupindex, indexgroup |
d67268158946
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
bcclaywell
parents:
diff
changeset
|
596 ) |