• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3"""Pattern compiler.
4
5The grammar is taken from PatternGrammar.txt.
6
7The compiler compiles a pattern to a pytree.*Pattern instance.
8"""
9
10__author__ = 'Guido van Rossum <guido@python.org>'
11
12# Python imports
13import io
14
15# Really local imports
16from . import pygram
17from . import pytree
18# Fairly local imports
19from .pgen2 import driver
20from .pgen2 import grammar
21from .pgen2 import literals
22from .pgen2 import parse
23from .pgen2 import token
24from .pgen2 import tokenize
25
26
27class PatternSyntaxError(Exception):
28  pass
29
30
31def tokenize_wrapper(input):
32  """Tokenizes a string suppressing significant whitespace."""
33  skip = {token.NEWLINE, token.INDENT, token.DEDENT}
34  tokens = tokenize.generate_tokens(io.StringIO(input).readline)
35  for quintuple in tokens:
36    type, value, start, end, line_text = quintuple
37    if type not in skip:
38      yield quintuple
39
40
41class PatternCompiler(object):
42
43  def __init__(self, grammar_file=None):
44    """Initializer.
45
46        Takes an optional alternative filename for the pattern grammar.
47        """
48    if grammar_file is None:
49      self.grammar = pygram.pattern_grammar
50      self.syms = pygram.pattern_symbols
51    else:
52      self.grammar = driver.load_grammar(grammar_file)
53      self.syms = pygram.Symbols(self.grammar)
54    self.pygrammar = pygram.python_grammar
55    self.pysyms = pygram.python_symbols
56    self.driver = driver.Driver(self.grammar, convert=pattern_convert)
57
58  def compile_pattern(self, input, debug=False, with_tree=False):
59    """Compiles a pattern string to a nested pytree.*Pattern object."""
60    tokens = tokenize_wrapper(input)
61    try:
62      root = self.driver.parse_tokens(tokens, debug=debug)
63    except parse.ParseError as e:
64      raise PatternSyntaxError(str(e)) from None
65    if with_tree:
66      return self.compile_node(root), root
67    else:
68      return self.compile_node(root)
69
70  def compile_node(self, node):
71    """Compiles a node, recursively.
72
73        This is one big switch on the node type.
74        """
75    # XXX Optimize certain Wildcard-containing-Wildcard patterns
76    # that can be merged
77    if node.type == self.syms.Matcher:
78      node = node.children[0]  # Avoid unneeded recursion
79
80    if node.type == self.syms.Alternatives:
81      # Skip the odd children since they are just '|' tokens
82      alts = [self.compile_node(ch) for ch in node.children[::2]]
83      if len(alts) == 1:
84        return alts[0]
85      p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
86      return p.optimize()
87
88    if node.type == self.syms.Alternative:
89      units = [self.compile_node(ch) for ch in node.children]
90      if len(units) == 1:
91        return units[0]
92      p = pytree.WildcardPattern([units], min=1, max=1)
93      return p.optimize()
94
95    if node.type == self.syms.NegatedUnit:
96      pattern = self.compile_basic(node.children[1:])
97      p = pytree.NegatedPattern(pattern)
98      return p.optimize()
99
100    assert node.type == self.syms.Unit
101
102    name = None
103    nodes = node.children
104    if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
105      name = nodes[0].value
106      nodes = nodes[2:]
107    repeat = None
108    if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
109      repeat = nodes[-1]
110      nodes = nodes[:-1]
111
112    # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
113    pattern = self.compile_basic(nodes, repeat)
114
115    if repeat is not None:
116      assert repeat.type == self.syms.Repeater
117      children = repeat.children
118      child = children[0]
119      if child.type == token.STAR:
120        min = 0
121        max = pytree.HUGE
122      elif child.type == token.PLUS:
123        min = 1
124        max = pytree.HUGE
125      elif child.type == token.LBRACE:
126        assert children[-1].type == token.RBRACE
127        assert len(children) in (3, 5)
128        min = max = self.get_int(children[1])
129        if len(children) == 5:
130          max = self.get_int(children[3])
131      else:
132        assert False
133      if min != 1 or max != 1:
134        pattern = pattern.optimize()
135        pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
136
137    if name is not None:
138      pattern.name = name
139    return pattern.optimize()
140
141  def compile_basic(self, nodes, repeat=None):
142    # Compile STRING | NAME [Details] | (...) | [...]
143    assert len(nodes) >= 1
144    node = nodes[0]
145    if node.type == token.STRING:
146      value = str(literals.evalString(node.value))
147      return pytree.LeafPattern(_type_of_literal(value), value)
148    elif node.type == token.NAME:
149      value = node.value
150      if value.isupper():
151        if value not in TOKEN_MAP:
152          raise PatternSyntaxError('Invalid token: %r' % value)
153        if nodes[1:]:
154          raise PatternSyntaxError("Can't have details for token")
155        return pytree.LeafPattern(TOKEN_MAP[value])
156      else:
157        if value == 'any':
158          type = None
159        elif not value.startswith('_'):
160          type = getattr(self.pysyms, value, None)
161          if type is None:
162            raise PatternSyntaxError('Invalid symbol: %r' % value)
163        if nodes[1:]:  # Details present
164          content = [self.compile_node(nodes[1].children[1])]
165        else:
166          content = None
167        return pytree.NodePattern(type, content)
168    elif node.value == '(':
169      return self.compile_node(nodes[1])
170    elif node.value == '[':
171      assert repeat is None
172      subpattern = self.compile_node(nodes[1])
173      return pytree.WildcardPattern([[subpattern]], min=0, max=1)
174    assert False, node
175
176  def get_int(self, node):
177    assert node.type == token.NUMBER
178    return int(node.value)
179
180
181# Map named tokens to the type value for a LeafPattern
182TOKEN_MAP = {
183    'NAME': token.NAME,
184    'STRING': token.STRING,
185    'NUMBER': token.NUMBER,
186    'TOKEN': None
187}
188
189
190def _type_of_literal(value):
191  if value[0].isalpha():
192    return token.NAME
193  elif value in grammar.opmap:
194    return grammar.opmap[value]
195  else:
196    return None
197
198
199def pattern_convert(grammar, raw_node_info):
200  """Converts raw node information to a Node or Leaf instance."""
201  type, value, context, children = raw_node_info
202  if children or type in grammar.number2symbol:
203    return pytree.Node(type, children, context=context)
204  else:
205    return pytree.Leaf(type, value, context=context)
206
207
208def compile_pattern(pattern):
209  return PatternCompiler().compile_pattern(pattern)
210