1import os.path 2import token 3from typing import Any, Dict, Optional, IO, Text, Tuple 4 5from pegen.grammar import ( 6 Cut, 7 GrammarVisitor, 8 NameLeaf, 9 StringLeaf, 10 Rhs, 11 NamedItem, 12 Lookahead, 13 PositiveLookahead, 14 NegativeLookahead, 15 Opt, 16 Repeat0, 17 Repeat1, 18 Gather, 19 Group, 20 Rule, 21 Alt, 22) 23from pegen import grammar 24from pegen.parser_generator import ParserGenerator 25 26MODULE_PREFIX = """\ 27#!/usr/bin/env python3.8 28# @generated by pegen from {filename} 29 30import ast 31from typing import Optional, Any 32 33from pegen.parser import memoize, memoize_left_rec, logger, Parser 34 35""" 36MODULE_SUFFIX = """ 37 38if __name__ == '__main__': 39 from pegen.parser import simple_parser_main 40 simple_parser_main(GeneratedParser) 41""" 42 43 44class PythonCallMakerVisitor(GrammarVisitor): 45 def __init__(self, parser_generator: ParserGenerator): 46 self.gen = parser_generator 47 self.cache: Dict[Any, Any] = {} 48 49 def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: 50 name = node.value 51 if name in ("NAME", "NUMBER", "STRING", "OP"): 52 name = name.lower() 53 return name, f"self.{name}()" 54 if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"): 55 return name.lower(), f"self.expect({name!r})" 56 return name, f"self.{name}()" 57 58 def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: 59 return "literal", f"self.expect({node.value})" 60 61 def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: 62 if node in self.cache: 63 return self.cache[node] 64 if len(node.alts) == 1 and len(node.alts[0].items) == 1: 65 self.cache[node] = self.visit(node.alts[0].items[0]) 66 else: 67 name = self.gen.name_node(node) 68 self.cache[node] = name, f"self.{name}()" 69 return self.cache[node] 70 71 def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: 72 name, call = self.visit(node.item) 73 if node.name: 74 name = node.name 75 return name, call 76 77 def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]: 78 name, call = self.visit(node.node) 79 head, tail = call.split("(", 1) 80 assert tail[-1] == ")" 81 tail = tail[:-1] 82 return head, tail 83 84 def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]: 85 head, tail = self.lookahead_call_helper(node) 86 return None, f"self.positive_lookahead({head}, {tail})" 87 88 def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]: 89 head, tail = self.lookahead_call_helper(node) 90 return None, f"self.negative_lookahead({head}, {tail})" 91 92 def visit_Opt(self, node: Opt) -> Tuple[str, str]: 93 name, call = self.visit(node.node) 94 # Note trailing comma (the call may already have one comma 95 # at the end, for example when rules have both repeat0 and optional 96 # markers, e.g: [rule*]) 97 if call.endswith(","): 98 return "opt", call 99 else: 100 return "opt", f"{call}," 101 102 def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: 103 if node in self.cache: 104 return self.cache[node] 105 name = self.gen.name_loop(node.node, False) 106 self.cache[node] = name, f"self.{name}()," # Also a trailing comma! 107 return self.cache[node] 108 109 def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: 110 if node in self.cache: 111 return self.cache[node] 112 name = self.gen.name_loop(node.node, True) 113 self.cache[node] = name, f"self.{name}()" # But no trailing comma here! 114 return self.cache[node] 115 116 def visit_Gather(self, node: Gather) -> Tuple[str, str]: 117 if node in self.cache: 118 return self.cache[node] 119 name = self.gen.name_gather(node) 120 self.cache[node] = name, f"self.{name}()" # No trailing comma here either! 121 return self.cache[node] 122 123 def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: 124 return self.visit(node.rhs) 125 126 def visit_Cut(self, node: Cut) -> Tuple[str, str]: 127 return "cut", "True" 128 129 130class PythonParserGenerator(ParserGenerator, GrammarVisitor): 131 def __init__( 132 self, 133 grammar: grammar.Grammar, 134 file: Optional[IO[Text]], 135 tokens: Dict[int, str] = token.tok_name, 136 ): 137 super().__init__(grammar, tokens, file) 138 self.callmakervisitor = PythonCallMakerVisitor(self) 139 140 def generate(self, filename: str) -> None: 141 header = self.grammar.metas.get("header", MODULE_PREFIX) 142 if header is not None: 143 basename = os.path.basename(filename) 144 self.print(header.rstrip("\n").format(filename=basename)) 145 subheader = self.grammar.metas.get("subheader", "") 146 if subheader: 147 self.print(subheader.format(filename=filename)) 148 self.print("class GeneratedParser(Parser):") 149 while self.todo: 150 for rulename, rule in list(self.todo.items()): 151 del self.todo[rulename] 152 self.print() 153 with self.indent(): 154 self.visit(rule) 155 trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX) 156 if trailer is not None: 157 self.print(trailer.rstrip("\n")) 158 159 def visit_Rule(self, node: Rule) -> None: 160 is_loop = node.is_loop() 161 is_gather = node.is_gather() 162 rhs = node.flatten() 163 if node.left_recursive: 164 if node.leader: 165 self.print("@memoize_left_rec") 166 else: 167 # Non-leader rules in a cycle are not memoized, 168 # but they must still be logged. 169 self.print("@logger") 170 else: 171 self.print("@memoize") 172 node_type = node.type or "Any" 173 self.print(f"def {node.name}(self) -> Optional[{node_type}]:") 174 with self.indent(): 175 self.print(f"# {node.name}: {rhs}") 176 if node.nullable: 177 self.print(f"# nullable={node.nullable}") 178 self.print("mark = self.mark()") 179 if is_loop: 180 self.print("children = []") 181 self.visit(rhs, is_loop=is_loop, is_gather=is_gather) 182 if is_loop: 183 self.print("return children") 184 else: 185 self.print("return None") 186 187 def visit_NamedItem(self, node: NamedItem) -> None: 188 name, call = self.callmakervisitor.visit(node.item) 189 if node.name: 190 name = node.name 191 if not name: 192 self.print(call) 193 else: 194 if name != "cut": 195 name = self.dedupe(name) 196 self.print(f"({name} := {call})") 197 198 def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None: 199 if is_loop: 200 assert len(node.alts) == 1 201 for alt in node.alts: 202 self.visit(alt, is_loop=is_loop, is_gather=is_gather) 203 204 def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None: 205 with self.local_variable_context(): 206 self.print("cut = False") # TODO: Only if needed. 207 if is_loop: 208 self.print("while (") 209 else: 210 self.print("if (") 211 with self.indent(): 212 first = True 213 for item in node.items: 214 if first: 215 first = False 216 else: 217 self.print("and") 218 self.visit(item) 219 if is_gather: 220 self.print("is not None") 221 222 self.print("):") 223 with self.indent(): 224 action = node.action 225 if not action: 226 if is_gather: 227 assert len(self.local_variable_names) == 2 228 action = ( 229 f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}" 230 ) 231 else: 232 action = f"[{', '.join(self.local_variable_names)}]" 233 if is_loop: 234 self.print(f"children.append({action})") 235 self.print(f"mark = self.mark()") 236 else: 237 self.print(f"return {action}") 238 self.print("self.reset(mark)") 239 # Skip remaining alternatives if a cut was reached. 240 self.print("if cut: return None") # TODO: Only if needed. 241