1# Copyright 2006 Google, Inc. All Rights Reserved. 2# Licensed to PSF under a Contributor Agreement. 3"""Pattern compiler. 4 5The grammar is taken from PatternGrammar.txt. 6 7The compiler compiles a pattern to a pytree.*Pattern instance. 8""" 9 10__author__ = 'Guido van Rossum <guido@python.org>' 11 12# Python imports 13import io 14 15# Really local imports 16from . import pygram 17from . import pytree 18# Fairly local imports 19from .pgen2 import driver 20from .pgen2 import grammar 21from .pgen2 import literals 22from .pgen2 import parse 23from .pgen2 import token 24from .pgen2 import tokenize 25 26 27class PatternSyntaxError(Exception): 28 pass 29 30 31def tokenize_wrapper(input): 32 """Tokenizes a string suppressing significant whitespace.""" 33 skip = {token.NEWLINE, token.INDENT, token.DEDENT} 34 tokens = tokenize.generate_tokens(io.StringIO(input).readline) 35 for quintuple in tokens: 36 type, value, start, end, line_text = quintuple 37 if type not in skip: 38 yield quintuple 39 40 41class PatternCompiler(object): 42 43 def __init__(self, grammar_file=None): 44 """Initializer. 45 46 Takes an optional alternative filename for the pattern grammar. 47 """ 48 if grammar_file is None: 49 self.grammar = pygram.pattern_grammar 50 self.syms = pygram.pattern_symbols 51 else: 52 self.grammar = driver.load_grammar(grammar_file) 53 self.syms = pygram.Symbols(self.grammar) 54 self.pygrammar = pygram.python_grammar 55 self.pysyms = pygram.python_symbols 56 self.driver = driver.Driver(self.grammar, convert=pattern_convert) 57 58 def compile_pattern(self, input, debug=False, with_tree=False): 59 """Compiles a pattern string to a nested pytree.*Pattern object.""" 60 tokens = tokenize_wrapper(input) 61 try: 62 root = self.driver.parse_tokens(tokens, debug=debug) 63 except parse.ParseError as e: 64 raise PatternSyntaxError(str(e)) from None 65 if with_tree: 66 return self.compile_node(root), root 67 else: 68 return self.compile_node(root) 69 70 def compile_node(self, node): 71 """Compiles a node, recursively. 72 73 This is one big switch on the node type. 74 """ 75 # XXX Optimize certain Wildcard-containing-Wildcard patterns 76 # that can be merged 77 if node.type == self.syms.Matcher: 78 node = node.children[0] # Avoid unneeded recursion 79 80 if node.type == self.syms.Alternatives: 81 # Skip the odd children since they are just '|' tokens 82 alts = [self.compile_node(ch) for ch in node.children[::2]] 83 if len(alts) == 1: 84 return alts[0] 85 p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) 86 return p.optimize() 87 88 if node.type == self.syms.Alternative: 89 units = [self.compile_node(ch) for ch in node.children] 90 if len(units) == 1: 91 return units[0] 92 p = pytree.WildcardPattern([units], min=1, max=1) 93 return p.optimize() 94 95 if node.type == self.syms.NegatedUnit: 96 pattern = self.compile_basic(node.children[1:]) 97 p = pytree.NegatedPattern(pattern) 98 return p.optimize() 99 100 assert node.type == self.syms.Unit 101 102 name = None 103 nodes = node.children 104 if len(nodes) >= 3 and nodes[1].type == token.EQUAL: 105 name = nodes[0].value 106 nodes = nodes[2:] 107 repeat = None 108 if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: 109 repeat = nodes[-1] 110 nodes = nodes[:-1] 111 112 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] 113 pattern = self.compile_basic(nodes, repeat) 114 115 if repeat is not None: 116 assert repeat.type == self.syms.Repeater 117 children = repeat.children 118 child = children[0] 119 if child.type == token.STAR: 120 min = 0 121 max = pytree.HUGE 122 elif child.type == token.PLUS: 123 min = 1 124 max = pytree.HUGE 125 elif child.type == token.LBRACE: 126 assert children[-1].type == token.RBRACE 127 assert len(children) in (3, 5) 128 min = max = self.get_int(children[1]) 129 if len(children) == 5: 130 max = self.get_int(children[3]) 131 else: 132 assert False 133 if min != 1 or max != 1: 134 pattern = pattern.optimize() 135 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) 136 137 if name is not None: 138 pattern.name = name 139 return pattern.optimize() 140 141 def compile_basic(self, nodes, repeat=None): 142 # Compile STRING | NAME [Details] | (...) | [...] 143 assert len(nodes) >= 1 144 node = nodes[0] 145 if node.type == token.STRING: 146 value = str(literals.evalString(node.value)) 147 return pytree.LeafPattern(_type_of_literal(value), value) 148 elif node.type == token.NAME: 149 value = node.value 150 if value.isupper(): 151 if value not in TOKEN_MAP: 152 raise PatternSyntaxError('Invalid token: %r' % value) 153 if nodes[1:]: 154 raise PatternSyntaxError("Can't have details for token") 155 return pytree.LeafPattern(TOKEN_MAP[value]) 156 else: 157 if value == 'any': 158 type = None 159 elif not value.startswith('_'): 160 type = getattr(self.pysyms, value, None) 161 if type is None: 162 raise PatternSyntaxError('Invalid symbol: %r' % value) 163 if nodes[1:]: # Details present 164 content = [self.compile_node(nodes[1].children[1])] 165 else: 166 content = None 167 return pytree.NodePattern(type, content) 168 elif node.value == '(': 169 return self.compile_node(nodes[1]) 170 elif node.value == '[': 171 assert repeat is None 172 subpattern = self.compile_node(nodes[1]) 173 return pytree.WildcardPattern([[subpattern]], min=0, max=1) 174 assert False, node 175 176 def get_int(self, node): 177 assert node.type == token.NUMBER 178 return int(node.value) 179 180 181# Map named tokens to the type value for a LeafPattern 182TOKEN_MAP = { 183 'NAME': token.NAME, 184 'STRING': token.STRING, 185 'NUMBER': token.NUMBER, 186 'TOKEN': None 187} 188 189 190def _type_of_literal(value): 191 if value[0].isalpha(): 192 return token.NAME 193 elif value in grammar.opmap: 194 return grammar.opmap[value] 195 else: 196 return None 197 198 199def pattern_convert(grammar, raw_node_info): 200 """Converts raw node information to a Node or Leaf instance.""" 201 type, value, context, children = raw_node_info 202 if children or type in grammar.number2symbol: 203 return pytree.Node(type, children, context=context) 204 else: 205 return pytree.Leaf(type, value, context=context) 206 207 208def compile_pattern(pattern): 209 return PatternCompiler().compile_pattern(pattern) 210