1import ast 2from dataclasses import field, dataclass 3import os.path 4import re 5from typing import Any, Dict, IO, Optional, List, Text, Tuple, Set 6from enum import Enum 7 8from pegen import grammar 9from pegen.grammar import ( 10 Alt, 11 Cut, 12 Forced, 13 Gather, 14 GrammarVisitor, 15 Group, 16 Lookahead, 17 NamedItem, 18 NameLeaf, 19 NegativeLookahead, 20 Opt, 21 PositiveLookahead, 22 Repeat0, 23 Repeat1, 24 Rhs, 25 Rule, 26 StringLeaf, 27) 28from pegen.parser_generator import ParserGenerator 29 30 31EXTENSION_PREFIX = """\ 32#include "pegen.h" 33 34#if defined(Py_DEBUG) && defined(Py_BUILD_CORE) 35# define D(x) if (Py_DebugFlag) x; 36#else 37# define D(x) 38#endif 39 40# define MAXSTACK 6000 41 42""" 43 44 45EXTENSION_SUFFIX = """ 46void * 47_PyPegen_parse(Parser *p) 48{ 49 // Initialize keywords 50 p->keywords = reserved_keywords; 51 p->n_keyword_lists = n_keyword_lists; 52 p->soft_keywords = soft_keywords; 53 54 return start_rule(p); 55} 56""" 57 58 59class NodeTypes(Enum): 60 NAME_TOKEN = 0 61 NUMBER_TOKEN = 1 62 STRING_TOKEN = 2 63 GENERIC_TOKEN = 3 64 KEYWORD = 4 65 SOFT_KEYWORD = 5 66 CUT_OPERATOR = 6 67 68 69BASE_NODETYPES = { 70 "NAME": NodeTypes.NAME_TOKEN, 71 "NUMBER": NodeTypes.NUMBER_TOKEN, 72 "STRING": NodeTypes.STRING_TOKEN, 73 "SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD, 74} 75 76 77@dataclass 78class FunctionCall: 79 function: str 80 arguments: List[Any] = field(default_factory=list) 81 assigned_variable: Optional[str] = None 82 assigned_variable_type: Optional[str] = None 83 return_type: Optional[str] = None 84 nodetype: Optional[NodeTypes] = None 85 force_true: bool = False 86 comment: Optional[str] = None 87 88 def __str__(self) -> str: 89 parts = [] 90 parts.append(self.function) 91 if self.arguments: 92 parts.append(f"({', '.join(map(str, self.arguments))})") 93 if self.force_true: 94 parts.append(", !p->error_indicator") 95 if self.assigned_variable: 96 if self.assigned_variable_type: 97 parts = ["(", self.assigned_variable, " = ", '(', self.assigned_variable_type, ')', *parts, ")"] 98 else: 99 parts = ["(", self.assigned_variable, " = ", *parts, ")"] 100 if self.comment: 101 parts.append(f" // {self.comment}") 102 return "".join(parts) 103 104 105class CCallMakerVisitor(GrammarVisitor): 106 def __init__( 107 self, 108 parser_generator: ParserGenerator, 109 exact_tokens: Dict[str, int], 110 non_exact_tokens: Set[str], 111 ): 112 self.gen = parser_generator 113 self.exact_tokens = exact_tokens 114 self.non_exact_tokens = non_exact_tokens 115 self.cache: Dict[Any, FunctionCall] = {} 116 self.keyword_cache: Dict[str, int] = {} 117 self.soft_keywords: Set[str] = set() 118 self.cleanup_statements: List[str] = [] 119 120 def keyword_helper(self, keyword: str) -> FunctionCall: 121 if keyword not in self.keyword_cache: 122 self.keyword_cache[keyword] = self.gen.keyword_type() 123 return FunctionCall( 124 assigned_variable="_keyword", 125 function="_PyPegen_expect_token", 126 arguments=["p", self.keyword_cache[keyword]], 127 return_type="Token *", 128 nodetype=NodeTypes.KEYWORD, 129 comment=f"token='{keyword}'", 130 ) 131 132 def soft_keyword_helper(self, value: str) -> FunctionCall: 133 self.soft_keywords.add(value.replace('"', "")) 134 return FunctionCall( 135 assigned_variable="_keyword", 136 function="_PyPegen_expect_soft_keyword", 137 arguments=["p", value], 138 return_type="expr_ty", 139 nodetype=NodeTypes.SOFT_KEYWORD, 140 comment=f"soft_keyword='{value}'", 141 ) 142 143 def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall: 144 name = node.value 145 if name in self.non_exact_tokens: 146 if name in BASE_NODETYPES: 147 return FunctionCall( 148 assigned_variable=f"{name.lower()}_var", 149 function=f"_PyPegen_{name.lower()}_token", 150 arguments=["p"], 151 nodetype=BASE_NODETYPES[name], 152 return_type="expr_ty", 153 comment=name, 154 ) 155 return FunctionCall( 156 assigned_variable=f"{name.lower()}_var", 157 function=f"_PyPegen_expect_token", 158 arguments=["p", name], 159 nodetype=NodeTypes.GENERIC_TOKEN, 160 return_type="Token *", 161 comment=f"token='{name}'", 162 ) 163 164 type = None 165 rule = self.gen.all_rules.get(name.lower()) 166 if rule is not None: 167 type = "asdl_seq *" if rule.is_loop() or rule.is_gather() else rule.type 168 169 return FunctionCall( 170 assigned_variable=f"{name}_var", 171 function=f"{name}_rule", 172 arguments=["p"], 173 return_type=type, 174 comment=f"{node}", 175 ) 176 177 def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall: 178 val = ast.literal_eval(node.value) 179 if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword 180 if node.value.endswith("'"): 181 return self.keyword_helper(val) 182 else: 183 return self.soft_keyword_helper(node.value) 184 else: 185 assert val in self.exact_tokens, f"{node.value} is not a known literal" 186 type = self.exact_tokens[val] 187 return FunctionCall( 188 assigned_variable="_literal", 189 function=f"_PyPegen_expect_token", 190 arguments=["p", type], 191 nodetype=NodeTypes.GENERIC_TOKEN, 192 return_type="Token *", 193 comment=f"token='{val}'", 194 ) 195 196 def visit_Rhs(self, node: Rhs) -> FunctionCall: 197 def can_we_inline(node: Rhs) -> int: 198 if len(node.alts) != 1 or len(node.alts[0].items) != 1: 199 return False 200 # If the alternative has an action we cannot inline 201 if getattr(node.alts[0], "action", None) is not None: 202 return False 203 return True 204 205 if node in self.cache: 206 return self.cache[node] 207 if can_we_inline(node): 208 self.cache[node] = self.generate_call(node.alts[0].items[0]) 209 else: 210 name = self.gen.name_node(node) 211 self.cache[node] = FunctionCall( 212 assigned_variable=f"{name}_var", 213 function=f"{name}_rule", 214 arguments=["p"], 215 comment=f"{node}", 216 ) 217 return self.cache[node] 218 219 def visit_NamedItem(self, node: NamedItem) -> FunctionCall: 220 call = self.generate_call(node.item) 221 if node.name: 222 call.assigned_variable = node.name 223 if node.type: 224 call.assigned_variable_type = node.type 225 return call 226 227 def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: 228 call = self.generate_call(node.node) 229 if call.nodetype == NodeTypes.NAME_TOKEN: 230 return FunctionCall( 231 function=f"_PyPegen_lookahead_with_name", 232 arguments=[positive, call.function, *call.arguments], 233 return_type="int", 234 ) 235 elif call.nodetype == NodeTypes.SOFT_KEYWORD: 236 return FunctionCall( 237 function=f"_PyPegen_lookahead_with_string", 238 arguments=[positive, call.function, *call.arguments], 239 return_type="int", 240 ) 241 elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: 242 return FunctionCall( 243 function=f"_PyPegen_lookahead_with_int", 244 arguments=[positive, call.function, *call.arguments], 245 return_type="int", 246 comment=f"token={node.node}", 247 ) 248 else: 249 return FunctionCall( 250 function=f"_PyPegen_lookahead", 251 arguments=[positive, call.function, *call.arguments], 252 return_type="int", 253 ) 254 255 def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: 256 return self.lookahead_call_helper(node, 1) 257 258 def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall: 259 return self.lookahead_call_helper(node, 0) 260 261 def visit_Forced(self, node: Forced) -> FunctionCall: 262 call = self.generate_call(node.node) 263 if call.nodetype == NodeTypes.GENERIC_TOKEN: 264 val = ast.literal_eval(node.node.value) 265 assert val in self.exact_tokens, f"{node.value} is not a known literal" 266 type = self.exact_tokens[val] 267 return FunctionCall( 268 assigned_variable="_literal", 269 function=f"_PyPegen_expect_forced_token", 270 arguments=["p", type, f'"{val}"'], 271 nodetype=NodeTypes.GENERIC_TOKEN, 272 return_type="Token *", 273 comment=f"forced_token='{val}'", 274 ) 275 else: 276 raise NotImplementedError( 277 f"Forced tokens don't work with {call.nodetype} tokens") 278 279 def visit_Opt(self, node: Opt) -> FunctionCall: 280 call = self.generate_call(node.node) 281 return FunctionCall( 282 assigned_variable="_opt_var", 283 function=call.function, 284 arguments=call.arguments, 285 force_true=True, 286 comment=f"{node}", 287 ) 288 289 def visit_Repeat0(self, node: Repeat0) -> FunctionCall: 290 if node in self.cache: 291 return self.cache[node] 292 name = self.gen.name_loop(node.node, False) 293 self.cache[node] = FunctionCall( 294 assigned_variable=f"{name}_var", 295 function=f"{name}_rule", 296 arguments=["p"], 297 return_type="asdl_seq *", 298 comment=f"{node}", 299 ) 300 return self.cache[node] 301 302 def visit_Repeat1(self, node: Repeat1) -> FunctionCall: 303 if node in self.cache: 304 return self.cache[node] 305 name = self.gen.name_loop(node.node, True) 306 self.cache[node] = FunctionCall( 307 assigned_variable=f"{name}_var", 308 function=f"{name}_rule", 309 arguments=["p"], 310 return_type="asdl_seq *", 311 comment=f"{node}", 312 ) 313 return self.cache[node] 314 315 def visit_Gather(self, node: Gather) -> FunctionCall: 316 if node in self.cache: 317 return self.cache[node] 318 name = self.gen.name_gather(node) 319 self.cache[node] = FunctionCall( 320 assigned_variable=f"{name}_var", 321 function=f"{name}_rule", 322 arguments=["p"], 323 return_type="asdl_seq *", 324 comment=f"{node}", 325 ) 326 return self.cache[node] 327 328 def visit_Group(self, node: Group) -> FunctionCall: 329 return self.generate_call(node.rhs) 330 331 def visit_Cut(self, node: Cut) -> FunctionCall: 332 return FunctionCall( 333 assigned_variable="_cut_var", 334 return_type="int", 335 function="1", 336 nodetype=NodeTypes.CUT_OPERATOR, 337 ) 338 339 def generate_call(self, node: Any) -> FunctionCall: 340 return super().visit(node) 341 342 343class CParserGenerator(ParserGenerator, GrammarVisitor): 344 def __init__( 345 self, 346 grammar: grammar.Grammar, 347 tokens: Dict[int, str], 348 exact_tokens: Dict[str, int], 349 non_exact_tokens: Set[str], 350 file: Optional[IO[Text]], 351 debug: bool = False, 352 skip_actions: bool = False, 353 ): 354 super().__init__(grammar, tokens, file) 355 self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor( 356 self, exact_tokens, non_exact_tokens 357 ) 358 self._varname_counter = 0 359 self.debug = debug 360 self.skip_actions = skip_actions 361 self.cleanup_statements: List[str] = [] 362 363 def add_level(self) -> None: 364 self.print("if (p->level++ == MAXSTACK) {") 365 with self.indent(): 366 self.print("p->error_indicator = 1;") 367 self.print("PyErr_NoMemory();") 368 self.print("}") 369 370 def remove_level(self) -> None: 371 self.print("p->level--;") 372 373 def add_return(self, ret_val: str) -> None: 374 for stmt in self.cleanup_statements: 375 self.print(stmt) 376 self.remove_level() 377 self.print(f"return {ret_val};") 378 379 def unique_varname(self, name: str = "tmpvar") -> str: 380 new_var = name + "_" + str(self._varname_counter) 381 self._varname_counter += 1 382 return new_var 383 384 def call_with_errorcheck_return(self, call_text: str, returnval: str) -> None: 385 error_var = self.unique_varname() 386 self.print(f"int {error_var} = {call_text};") 387 self.print(f"if ({error_var}) {{") 388 with self.indent(): 389 self.add_return(returnval) 390 self.print("}") 391 392 def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None: 393 error_var = self.unique_varname() 394 self.print(f"int {error_var} = {call_text};") 395 self.print(f"if ({error_var}) {{") 396 with self.indent(): 397 self.print(f"goto {goto_target};") 398 self.print(f"}}") 399 400 def out_of_memory_return(self, expr: str, cleanup_code: Optional[str] = None,) -> None: 401 self.print(f"if ({expr}) {{") 402 with self.indent(): 403 if cleanup_code is not None: 404 self.print(cleanup_code) 405 self.print("p->error_indicator = 1;") 406 self.print("PyErr_NoMemory();") 407 self.add_return("NULL") 408 self.print(f"}}") 409 410 def out_of_memory_goto(self, expr: str, goto_target: str) -> None: 411 self.print(f"if ({expr}) {{") 412 with self.indent(): 413 self.print("PyErr_NoMemory();") 414 self.print(f"goto {goto_target};") 415 self.print(f"}}") 416 417 def generate(self, filename: str) -> None: 418 self.collect_todo() 419 basename = os.path.basename(filename) 420 self.print(f"// @generated by pegen.py from {basename}") 421 header = self.grammar.metas.get("header", EXTENSION_PREFIX) 422 if header: 423 self.print(header.rstrip("\n")) 424 subheader = self.grammar.metas.get("subheader", "") 425 if subheader: 426 self.print(subheader) 427 self._setup_keywords() 428 self._setup_soft_keywords() 429 for i, (rulename, rule) in enumerate(self.todo.items(), 1000): 430 comment = " // Left-recursive" if rule.left_recursive else "" 431 self.print(f"#define {rulename}_type {i}{comment}") 432 self.print() 433 for rulename, rule in self.todo.items(): 434 if rule.is_loop() or rule.is_gather(): 435 type = "asdl_seq *" 436 elif rule.type: 437 type = rule.type + " " 438 else: 439 type = "void *" 440 self.print(f"static {type}{rulename}_rule(Parser *p);") 441 self.print() 442 while self.todo: 443 for rulename, rule in list(self.todo.items()): 444 del self.todo[rulename] 445 self.print() 446 if rule.left_recursive: 447 self.print("// Left-recursive") 448 self.visit(rule) 449 if self.skip_actions: 450 mode = 0 451 else: 452 mode = int(self.rules["start"].type == "mod_ty") if "start" in self.rules else 1 453 if mode == 1 and self.grammar.metas.get("bytecode"): 454 mode += 1 455 modulename = self.grammar.metas.get("modulename", "parse") 456 trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX) 457 if trailer: 458 self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) 459 460 def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: 461 groups: Dict[int, List[Tuple[str, int]]] = {} 462 for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items(): 463 length = len(keyword_str) 464 if length in groups: 465 groups[length].append((keyword_str, keyword_type)) 466 else: 467 groups[length] = [(keyword_str, keyword_type)] 468 return groups 469 470 def _setup_keywords(self) -> None: 471 keyword_cache = self.callmakervisitor.keyword_cache 472 n_keyword_lists = ( 473 len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0 474 ) 475 self.print(f"static const int n_keyword_lists = {n_keyword_lists};") 476 groups = self._group_keywords_by_length() 477 self.print("static KeywordToken *reserved_keywords[] = {") 478 with self.indent(): 479 num_groups = max(groups) + 1 if groups else 1 480 for keywords_length in range(num_groups): 481 if keywords_length not in groups.keys(): 482 self.print("(KeywordToken[]) {{NULL, -1}},") 483 else: 484 self.print("(KeywordToken[]) {") 485 with self.indent(): 486 for keyword_str, keyword_type in groups[keywords_length]: 487 self.print(f'{{"{keyword_str}", {keyword_type}}},') 488 self.print("{NULL, -1},") 489 self.print("},") 490 self.print("};") 491 492 def _setup_soft_keywords(self) -> None: 493 soft_keywords = sorted(self.callmakervisitor.soft_keywords) 494 self.print("static char *soft_keywords[] = {") 495 with self.indent(): 496 for keyword in soft_keywords: 497 self.print(f'"{keyword}",') 498 self.print("NULL,") 499 self.print("};") 500 501 def _set_up_token_start_metadata_extraction(self) -> None: 502 self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {") 503 with self.indent(): 504 self.print("p->error_indicator = 1;") 505 self.add_return("NULL") 506 self.print("}") 507 self.print("int _start_lineno = p->tokens[_mark]->lineno;") 508 self.print("UNUSED(_start_lineno); // Only used by EXTRA macro") 509 self.print("int _start_col_offset = p->tokens[_mark]->col_offset;") 510 self.print("UNUSED(_start_col_offset); // Only used by EXTRA macro") 511 512 def _set_up_token_end_metadata_extraction(self) -> None: 513 self.print("Token *_token = _PyPegen_get_last_nonnwhitespace_token(p);") 514 self.print("if (_token == NULL) {") 515 with self.indent(): 516 self.add_return("NULL") 517 self.print("}") 518 self.print("int _end_lineno = _token->end_lineno;") 519 self.print("UNUSED(_end_lineno); // Only used by EXTRA macro") 520 self.print("int _end_col_offset = _token->end_col_offset;") 521 self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro") 522 523 def _check_for_errors(self) -> None: 524 self.print("if (p->error_indicator) {") 525 with self.indent(): 526 self.add_return("NULL") 527 self.print("}") 528 529 def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: 530 self.print("{") 531 with self.indent(): 532 self.add_level() 533 self.print(f"{result_type} _res = NULL;") 534 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 535 with self.indent(): 536 self.add_return("_res") 537 self.print("}") 538 self.print("int _mark = p->mark;") 539 self.print("int _resmark = p->mark;") 540 self.print("while (1) {") 541 with self.indent(): 542 self.call_with_errorcheck_return( 543 f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res" 544 ) 545 self.print("p->mark = _mark;") 546 self.print(f"void *_raw = {node.name}_raw(p);") 547 self.print("if (p->error_indicator) {") 548 with self.indent(): 549 self.add_return("NULL") 550 self.print("}") 551 self.print("if (_raw == NULL || p->mark <= _resmark)") 552 with self.indent(): 553 self.print("break;") 554 self.print(f"_resmark = p->mark;") 555 self.print("_res = _raw;") 556 self.print("}") 557 self.print(f"p->mark = _resmark;") 558 self.add_return("_res") 559 self.print("}") 560 self.print(f"static {result_type}") 561 self.print(f"{node.name}_raw(Parser *p)") 562 563 def _should_memoize(self, node: Rule) -> bool: 564 return node.memo and not node.left_recursive 565 566 def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None: 567 memoize = self._should_memoize(node) 568 569 with self.indent(): 570 self.add_level() 571 self._check_for_errors() 572 self.print(f"{result_type} _res = NULL;") 573 if memoize: 574 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 575 with self.indent(): 576 self.add_return("_res") 577 self.print("}") 578 self.print("int _mark = p->mark;") 579 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): 580 self._set_up_token_start_metadata_extraction() 581 self.visit( 582 rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name, 583 ) 584 if self.debug: 585 self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));') 586 self.print("_res = NULL;") 587 self.print(" done:") 588 with self.indent(): 589 if memoize: 590 self.print(f"_PyPegen_insert_memo(p, _mark, {node.name}_type, _res);") 591 self.add_return("_res") 592 593 def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: 594 memoize = self._should_memoize(node) 595 is_repeat1 = node.name.startswith("_loop1") 596 597 with self.indent(): 598 self.add_level() 599 self._check_for_errors() 600 self.print("void *_res = NULL;") 601 if memoize: 602 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 603 with self.indent(): 604 self.add_return("_res") 605 self.print("}") 606 self.print("int _mark = p->mark;") 607 self.print("int _start_mark = p->mark;") 608 self.print("void **_children = PyMem_Malloc(sizeof(void *));") 609 self.out_of_memory_return(f"!_children") 610 self.print("Py_ssize_t _children_capacity = 1;") 611 self.print("Py_ssize_t _n = 0;") 612 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): 613 self._set_up_token_start_metadata_extraction() 614 self.visit( 615 rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name, 616 ) 617 if is_repeat1: 618 self.print("if (_n == 0 || p->error_indicator) {") 619 with self.indent(): 620 self.print("PyMem_Free(_children);") 621 self.add_return("NULL") 622 self.print("}") 623 self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);") 624 self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") 625 self.print("for (int i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);") 626 self.print("PyMem_Free(_children);") 627 if node.name: 628 self.print(f"_PyPegen_insert_memo(p, _start_mark, {node.name}_type, _seq);") 629 self.add_return("_seq") 630 631 def visit_Rule(self, node: Rule) -> None: 632 is_loop = node.is_loop() 633 is_gather = node.is_gather() 634 rhs = node.flatten() 635 if is_loop or is_gather: 636 result_type = "asdl_seq *" 637 elif node.type: 638 result_type = node.type 639 else: 640 result_type = "void *" 641 642 for line in str(node).splitlines(): 643 self.print(f"// {line}") 644 if node.left_recursive and node.leader: 645 self.print(f"static {result_type} {node.name}_raw(Parser *);") 646 647 self.print(f"static {result_type}") 648 self.print(f"{node.name}_rule(Parser *p)") 649 650 if node.left_recursive and node.leader: 651 self._set_up_rule_memoization(node, result_type) 652 653 self.print("{") 654 655 if node.name.endswith("without_invalid"): 656 with self.indent(): 657 self.print("int _prev_call_invalid = p->call_invalid_rules;") 658 self.print("p->call_invalid_rules = 0;") 659 self.cleanup_statements.append("p->call_invalid_rules = _prev_call_invalid;") 660 661 if is_loop: 662 self._handle_loop_rule_body(node, rhs) 663 else: 664 self._handle_default_rule_body(node, rhs, result_type) 665 666 if node.name.endswith("without_invalid"): 667 self.cleanup_statements.pop() 668 669 self.print("}") 670 671 def visit_NamedItem(self, node: NamedItem) -> None: 672 call = self.callmakervisitor.generate_call(node) 673 if call.assigned_variable: 674 call.assigned_variable = self.dedupe(call.assigned_variable) 675 self.print(call) 676 677 def visit_Rhs( 678 self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] 679 ) -> None: 680 if is_loop: 681 assert len(node.alts) == 1 682 for alt in node.alts: 683 self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename) 684 685 def join_conditions(self, keyword: str, node: Any) -> None: 686 self.print(f"{keyword} (") 687 with self.indent(): 688 first = True 689 for item in node.items: 690 if first: 691 first = False 692 else: 693 self.print("&&") 694 self.visit(item) 695 self.print(")") 696 697 def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: 698 self.print(f"_res = {node.action};") 699 700 self.print("if (_res == NULL && PyErr_Occurred()) {") 701 with self.indent(): 702 self.print("p->error_indicator = 1;") 703 if cleanup_code: 704 self.print(cleanup_code) 705 self.add_return("NULL") 706 self.print("}") 707 708 if self.debug: 709 self.print( 710 f'D(fprintf(stderr, "Hit with action [%d-%d]: %s\\n", _mark, p->mark, "{node}"));' 711 ) 712 713 def emit_default_action(self, is_gather: bool, node: Alt) -> None: 714 if len(self.local_variable_names) > 1: 715 if is_gather: 716 assert len(self.local_variable_names) == 2 717 self.print( 718 f"_res = _PyPegen_seq_insert_in_front(p, " 719 f"{self.local_variable_names[0]}, {self.local_variable_names[1]});" 720 ) 721 else: 722 if self.debug: 723 self.print( 724 f'D(fprintf(stderr, "Hit without action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));' 725 ) 726 self.print( 727 f"_res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});" 728 ) 729 else: 730 if self.debug: 731 self.print( 732 f'D(fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));' 733 ) 734 self.print(f"_res = {self.local_variable_names[0]};") 735 736 def emit_dummy_action(self) -> None: 737 self.print("_res = _PyPegen_dummy_name(p);") 738 739 def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: 740 self.join_conditions(keyword="if", node=node) 741 self.print("{") 742 # We have parsed successfully all the conditions for the option. 743 with self.indent(): 744 node_str = str(node).replace('"', '\\"') 745 self.print( 746 f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));' 747 ) 748 # Prepare to emit the rule action and do so 749 if node.action and "EXTRA" in node.action: 750 self._set_up_token_end_metadata_extraction() 751 if self.skip_actions: 752 self.emit_dummy_action() 753 elif node.action: 754 self.emit_action(node) 755 else: 756 self.emit_default_action(is_gather, node) 757 758 # As the current option has parsed correctly, do not continue with the rest. 759 self.print(f"goto done;") 760 self.print("}") 761 762 def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: 763 # Condition of the main body of the alternative 764 self.join_conditions(keyword="while", node=node) 765 self.print("{") 766 # We have parsed successfully one item! 767 with self.indent(): 768 # Prepare to emit the rule action and do so 769 if node.action and "EXTRA" in node.action: 770 self._set_up_token_end_metadata_extraction() 771 if self.skip_actions: 772 self.emit_dummy_action() 773 elif node.action: 774 self.emit_action(node, cleanup_code="PyMem_Free(_children);") 775 else: 776 self.emit_default_action(is_gather, node) 777 778 # Add the result of rule to the temporary buffer of children. This buffer 779 # will populate later an asdl_seq with all elements to return. 780 self.print("if (_n == _children_capacity) {") 781 with self.indent(): 782 self.print("_children_capacity *= 2;") 783 self.print( 784 "void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));" 785 ) 786 self.out_of_memory_return(f"!_new_children") 787 self.print("_children = _new_children;") 788 self.print("}") 789 self.print("_children[_n++] = _res;") 790 self.print("_mark = p->mark;") 791 self.print("}") 792 793 def visit_Alt( 794 self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] 795 ) -> None: 796 if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'): 797 self.print(f"if (p->call_invalid_rules) {{ // {node}") 798 else: 799 self.print(f"{{ // {node}") 800 with self.indent(): 801 self._check_for_errors() 802 node_str = str(node).replace('"', '\\"') 803 self.print( 804 f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));' 805 ) 806 # Prepare variable declarations for the alternative 807 vars = self.collect_vars(node) 808 for v, var_type in sorted(item for item in vars.items() if item[0] is not None): 809 if not var_type: 810 var_type = "void *" 811 else: 812 var_type += " " 813 if v == "_cut_var": 814 v += " = 0" # cut_var must be initialized 815 self.print(f"{var_type}{v};") 816 if v.startswith("_opt_var"): 817 self.print(f"UNUSED({v}); // Silence compiler warnings") 818 819 with self.local_variable_context(): 820 if is_loop: 821 self.handle_alt_loop(node, is_gather, rulename) 822 else: 823 self.handle_alt_normal(node, is_gather, rulename) 824 825 self.print("p->mark = _mark;") 826 node_str = str(node).replace('"', '\\"') 827 self.print( 828 f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n" 829 f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));' 830 ) 831 if "_cut_var" in vars: 832 self.print("if (_cut_var) {") 833 with self.indent(): 834 self.add_return("NULL") 835 self.print("}") 836 self.print("}") 837 838 def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: 839 types = {} 840 with self.local_variable_context(): 841 for item in node.items: 842 name, type = self.add_var(item) 843 types[name] = type 844 return types 845 846 def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]: 847 call = self.callmakervisitor.generate_call(node.item) 848 name = node.name if node.name else call.assigned_variable 849 if name is not None: 850 name = self.dedupe(name) 851 return_type = call.return_type if node.type is None else node.type 852 return name, return_type 853