1import ast 2from dataclasses import field, dataclass 3import os.path 4import re 5from typing import Any, Dict, IO, Optional, List, Text, Tuple, Set 6from enum import Enum 7 8from pegen import grammar 9from pegen.grammar import ( 10 Alt, 11 Cut, 12 Forced, 13 Gather, 14 GrammarVisitor, 15 Group, 16 Lookahead, 17 NamedItem, 18 NameLeaf, 19 NegativeLookahead, 20 Opt, 21 PositiveLookahead, 22 Repeat0, 23 Repeat1, 24 Rhs, 25 Rule, 26 StringLeaf, 27) 28from pegen.parser_generator import ParserGenerator 29 30 31EXTENSION_PREFIX = """\ 32#include "pegen.h" 33 34#if defined(Py_DEBUG) && defined(Py_BUILD_CORE) 35# define D(x) if (Py_DebugFlag) x; 36#else 37# define D(x) 38#endif 39 40# define MAXSTACK 6000 41 42""" 43 44 45EXTENSION_SUFFIX = """ 46void * 47_PyPegen_parse(Parser *p) 48{ 49 // Initialize keywords 50 p->keywords = reserved_keywords; 51 p->n_keyword_lists = n_keyword_lists; 52 p->soft_keywords = soft_keywords; 53 54 return start_rule(p); 55} 56""" 57 58 59class NodeTypes(Enum): 60 NAME_TOKEN = 0 61 NUMBER_TOKEN = 1 62 STRING_TOKEN = 2 63 GENERIC_TOKEN = 3 64 KEYWORD = 4 65 SOFT_KEYWORD = 5 66 CUT_OPERATOR = 6 67 68 69BASE_NODETYPES = { 70 "NAME": NodeTypes.NAME_TOKEN, 71 "NUMBER": NodeTypes.NUMBER_TOKEN, 72 "STRING": NodeTypes.STRING_TOKEN, 73 "SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD, 74} 75 76 77@dataclass 78class FunctionCall: 79 function: str 80 arguments: List[Any] = field(default_factory=list) 81 assigned_variable: Optional[str] = None 82 assigned_variable_type: Optional[str] = None 83 return_type: Optional[str] = None 84 nodetype: Optional[NodeTypes] = None 85 force_true: bool = False 86 comment: Optional[str] = None 87 88 def __str__(self) -> str: 89 parts = [] 90 parts.append(self.function) 91 if self.arguments: 92 parts.append(f"({', '.join(map(str, self.arguments))})") 93 if self.force_true: 94 parts.append(", !p->error_indicator") 95 if self.assigned_variable: 96 if self.assigned_variable_type: 97 parts = ["(", self.assigned_variable, " = ", '(', self.assigned_variable_type, ')', *parts, ")"] 98 else: 99 parts = ["(", self.assigned_variable, " = ", *parts, ")"] 100 if self.comment: 101 parts.append(f" // {self.comment}") 102 return "".join(parts) 103 104 105class CCallMakerVisitor(GrammarVisitor): 106 def __init__( 107 self, 108 parser_generator: ParserGenerator, 109 exact_tokens: Dict[str, int], 110 non_exact_tokens: Set[str], 111 ): 112 self.gen = parser_generator 113 self.exact_tokens = exact_tokens 114 self.non_exact_tokens = non_exact_tokens 115 self.cache: Dict[Any, FunctionCall] = {} 116 self.keyword_cache: Dict[str, int] = {} 117 self.soft_keywords: Set[str] = set() 118 119 def keyword_helper(self, keyword: str) -> FunctionCall: 120 if keyword not in self.keyword_cache: 121 self.keyword_cache[keyword] = self.gen.keyword_type() 122 return FunctionCall( 123 assigned_variable="_keyword", 124 function="_PyPegen_expect_token", 125 arguments=["p", self.keyword_cache[keyword]], 126 return_type="Token *", 127 nodetype=NodeTypes.KEYWORD, 128 comment=f"token='{keyword}'", 129 ) 130 131 def soft_keyword_helper(self, value: str) -> FunctionCall: 132 self.soft_keywords.add(value.replace('"', "")) 133 return FunctionCall( 134 assigned_variable="_keyword", 135 function="_PyPegen_expect_soft_keyword", 136 arguments=["p", value], 137 return_type="expr_ty", 138 nodetype=NodeTypes.SOFT_KEYWORD, 139 comment=f"soft_keyword='{value}'", 140 ) 141 142 def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall: 143 name = node.value 144 if name in self.non_exact_tokens: 145 if name in BASE_NODETYPES: 146 return FunctionCall( 147 assigned_variable=f"{name.lower()}_var", 148 function=f"_PyPegen_{name.lower()}_token", 149 arguments=["p"], 150 nodetype=BASE_NODETYPES[name], 151 return_type="expr_ty", 152 comment=name, 153 ) 154 return FunctionCall( 155 assigned_variable=f"{name.lower()}_var", 156 function=f"_PyPegen_expect_token", 157 arguments=["p", name], 158 nodetype=NodeTypes.GENERIC_TOKEN, 159 return_type="Token *", 160 comment=f"token='{name}'", 161 ) 162 163 type = None 164 rule = self.gen.all_rules.get(name.lower()) 165 if rule is not None: 166 type = "asdl_seq *" if rule.is_loop() or rule.is_gather() else rule.type 167 168 return FunctionCall( 169 assigned_variable=f"{name}_var", 170 function=f"{name}_rule", 171 arguments=["p"], 172 return_type=type, 173 comment=f"{node}", 174 ) 175 176 def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall: 177 val = ast.literal_eval(node.value) 178 if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword 179 if node.value.endswith("'"): 180 return self.keyword_helper(val) 181 else: 182 return self.soft_keyword_helper(node.value) 183 else: 184 assert val in self.exact_tokens, f"{node.value} is not a known literal" 185 type = self.exact_tokens[val] 186 return FunctionCall( 187 assigned_variable="_literal", 188 function=f"_PyPegen_expect_token", 189 arguments=["p", type], 190 nodetype=NodeTypes.GENERIC_TOKEN, 191 return_type="Token *", 192 comment=f"token='{val}'", 193 ) 194 195 def visit_Rhs(self, node: Rhs) -> FunctionCall: 196 def can_we_inline(node: Rhs) -> int: 197 if len(node.alts) != 1 or len(node.alts[0].items) != 1: 198 return False 199 # If the alternative has an action we cannot inline 200 if getattr(node.alts[0], "action", None) is not None: 201 return False 202 return True 203 204 if node in self.cache: 205 return self.cache[node] 206 if can_we_inline(node): 207 self.cache[node] = self.generate_call(node.alts[0].items[0]) 208 else: 209 name = self.gen.name_node(node) 210 self.cache[node] = FunctionCall( 211 assigned_variable=f"{name}_var", 212 function=f"{name}_rule", 213 arguments=["p"], 214 comment=f"{node}", 215 ) 216 return self.cache[node] 217 218 def visit_NamedItem(self, node: NamedItem) -> FunctionCall: 219 call = self.generate_call(node.item) 220 if node.name: 221 call.assigned_variable = node.name 222 if node.type: 223 call.assigned_variable_type = node.type 224 return call 225 226 def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall: 227 call = self.generate_call(node.node) 228 if call.nodetype == NodeTypes.NAME_TOKEN: 229 return FunctionCall( 230 function=f"_PyPegen_lookahead_with_name", 231 arguments=[positive, call.function, *call.arguments], 232 return_type="int", 233 ) 234 elif call.nodetype == NodeTypes.SOFT_KEYWORD: 235 return FunctionCall( 236 function=f"_PyPegen_lookahead_with_string", 237 arguments=[positive, call.function, *call.arguments], 238 return_type="int", 239 ) 240 elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}: 241 return FunctionCall( 242 function=f"_PyPegen_lookahead_with_int", 243 arguments=[positive, call.function, *call.arguments], 244 return_type="int", 245 comment=f"token={node.node}", 246 ) 247 else: 248 return FunctionCall( 249 function=f"_PyPegen_lookahead", 250 arguments=[positive, call.function, *call.arguments], 251 return_type="int", 252 ) 253 254 def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall: 255 return self.lookahead_call_helper(node, 1) 256 257 def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall: 258 return self.lookahead_call_helper(node, 0) 259 260 def visit_Forced(self, node: Forced) -> FunctionCall: 261 call = self.generate_call(node.node) 262 if call.nodetype == NodeTypes.GENERIC_TOKEN: 263 val = ast.literal_eval(node.node.value) 264 assert val in self.exact_tokens, f"{node.value} is not a known literal" 265 type = self.exact_tokens[val] 266 return FunctionCall( 267 assigned_variable="_literal", 268 function=f"_PyPegen_expect_forced_token", 269 arguments=["p", type, f'"{val}"'], 270 nodetype=NodeTypes.GENERIC_TOKEN, 271 return_type="Token *", 272 comment=f"forced_token='{val}'", 273 ) 274 else: 275 raise NotImplementedError( 276 f"Forced tokens don't work with {call.nodetype} tokens") 277 278 def visit_Opt(self, node: Opt) -> FunctionCall: 279 call = self.generate_call(node.node) 280 return FunctionCall( 281 assigned_variable="_opt_var", 282 function=call.function, 283 arguments=call.arguments, 284 force_true=True, 285 comment=f"{node}", 286 ) 287 288 def visit_Repeat0(self, node: Repeat0) -> FunctionCall: 289 if node in self.cache: 290 return self.cache[node] 291 name = self.gen.name_loop(node.node, False) 292 self.cache[node] = FunctionCall( 293 assigned_variable=f"{name}_var", 294 function=f"{name}_rule", 295 arguments=["p"], 296 return_type="asdl_seq *", 297 comment=f"{node}", 298 ) 299 return self.cache[node] 300 301 def visit_Repeat1(self, node: Repeat1) -> FunctionCall: 302 if node in self.cache: 303 return self.cache[node] 304 name = self.gen.name_loop(node.node, True) 305 self.cache[node] = FunctionCall( 306 assigned_variable=f"{name}_var", 307 function=f"{name}_rule", 308 arguments=["p"], 309 return_type="asdl_seq *", 310 comment=f"{node}", 311 ) 312 return self.cache[node] 313 314 def visit_Gather(self, node: Gather) -> FunctionCall: 315 if node in self.cache: 316 return self.cache[node] 317 name = self.gen.name_gather(node) 318 self.cache[node] = FunctionCall( 319 assigned_variable=f"{name}_var", 320 function=f"{name}_rule", 321 arguments=["p"], 322 return_type="asdl_seq *", 323 comment=f"{node}", 324 ) 325 return self.cache[node] 326 327 def visit_Group(self, node: Group) -> FunctionCall: 328 return self.generate_call(node.rhs) 329 330 def visit_Cut(self, node: Cut) -> FunctionCall: 331 return FunctionCall( 332 assigned_variable="_cut_var", 333 return_type="int", 334 function="1", 335 nodetype=NodeTypes.CUT_OPERATOR, 336 ) 337 338 def generate_call(self, node: Any) -> FunctionCall: 339 return super().visit(node) 340 341 342class CParserGenerator(ParserGenerator, GrammarVisitor): 343 def __init__( 344 self, 345 grammar: grammar.Grammar, 346 tokens: Dict[int, str], 347 exact_tokens: Dict[str, int], 348 non_exact_tokens: Set[str], 349 file: Optional[IO[Text]], 350 debug: bool = False, 351 skip_actions: bool = False, 352 ): 353 super().__init__(grammar, tokens, file) 354 self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor( 355 self, exact_tokens, non_exact_tokens 356 ) 357 self._varname_counter = 0 358 self.debug = debug 359 self.skip_actions = skip_actions 360 361 def add_level(self) -> None: 362 self.print("if (p->level++ == MAXSTACK) {") 363 with self.indent(): 364 self.print("p->error_indicator = 1;") 365 self.print("PyErr_NoMemory();") 366 self.print("}") 367 368 def remove_level(self) -> None: 369 self.print("p->level--;") 370 371 def add_return(self, ret_val: str) -> None: 372 self.remove_level() 373 self.print(f"return {ret_val};") 374 375 def unique_varname(self, name: str = "tmpvar") -> str: 376 new_var = name + "_" + str(self._varname_counter) 377 self._varname_counter += 1 378 return new_var 379 380 def call_with_errorcheck_return(self, call_text: str, returnval: str) -> None: 381 error_var = self.unique_varname() 382 self.print(f"int {error_var} = {call_text};") 383 self.print(f"if ({error_var}) {{") 384 with self.indent(): 385 self.add_return(returnval) 386 self.print("}") 387 388 def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None: 389 error_var = self.unique_varname() 390 self.print(f"int {error_var} = {call_text};") 391 self.print(f"if ({error_var}) {{") 392 with self.indent(): 393 self.print(f"goto {goto_target};") 394 self.print(f"}}") 395 396 def out_of_memory_return(self, expr: str, cleanup_code: Optional[str] = None,) -> None: 397 self.print(f"if ({expr}) {{") 398 with self.indent(): 399 if cleanup_code is not None: 400 self.print(cleanup_code) 401 self.print("p->error_indicator = 1;") 402 self.print("PyErr_NoMemory();") 403 self.add_return("NULL") 404 self.print(f"}}") 405 406 def out_of_memory_goto(self, expr: str, goto_target: str) -> None: 407 self.print(f"if ({expr}) {{") 408 with self.indent(): 409 self.print("PyErr_NoMemory();") 410 self.print(f"goto {goto_target};") 411 self.print(f"}}") 412 413 def generate(self, filename: str) -> None: 414 self.collect_todo() 415 basename = os.path.basename(filename) 416 self.print(f"// @generated by pegen.py from {basename}") 417 header = self.grammar.metas.get("header", EXTENSION_PREFIX) 418 if header: 419 self.print(header.rstrip("\n")) 420 subheader = self.grammar.metas.get("subheader", "") 421 if subheader: 422 self.print(subheader) 423 self._setup_keywords() 424 self._setup_soft_keywords() 425 for i, (rulename, rule) in enumerate(self.todo.items(), 1000): 426 comment = " // Left-recursive" if rule.left_recursive else "" 427 self.print(f"#define {rulename}_type {i}{comment}") 428 self.print() 429 for rulename, rule in self.todo.items(): 430 if rule.is_loop() or rule.is_gather(): 431 type = "asdl_seq *" 432 elif rule.type: 433 type = rule.type + " " 434 else: 435 type = "void *" 436 self.print(f"static {type}{rulename}_rule(Parser *p);") 437 self.print() 438 while self.todo: 439 for rulename, rule in list(self.todo.items()): 440 del self.todo[rulename] 441 self.print() 442 if rule.left_recursive: 443 self.print("// Left-recursive") 444 self.visit(rule) 445 if self.skip_actions: 446 mode = 0 447 else: 448 mode = int(self.rules["start"].type == "mod_ty") if "start" in self.rules else 1 449 if mode == 1 and self.grammar.metas.get("bytecode"): 450 mode += 1 451 modulename = self.grammar.metas.get("modulename", "parse") 452 trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX) 453 if trailer: 454 self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) 455 456 def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: 457 groups: Dict[int, List[Tuple[str, int]]] = {} 458 for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items(): 459 length = len(keyword_str) 460 if length in groups: 461 groups[length].append((keyword_str, keyword_type)) 462 else: 463 groups[length] = [(keyword_str, keyword_type)] 464 return groups 465 466 def _setup_keywords(self) -> None: 467 keyword_cache = self.callmakervisitor.keyword_cache 468 n_keyword_lists = ( 469 len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0 470 ) 471 self.print(f"static const int n_keyword_lists = {n_keyword_lists};") 472 groups = self._group_keywords_by_length() 473 self.print("static KeywordToken *reserved_keywords[] = {") 474 with self.indent(): 475 num_groups = max(groups) + 1 if groups else 1 476 for keywords_length in range(num_groups): 477 if keywords_length not in groups.keys(): 478 self.print("(KeywordToken[]) {{NULL, -1}},") 479 else: 480 self.print("(KeywordToken[]) {") 481 with self.indent(): 482 for keyword_str, keyword_type in groups[keywords_length]: 483 self.print(f'{{"{keyword_str}", {keyword_type}}},') 484 self.print("{NULL, -1},") 485 self.print("},") 486 self.print("};") 487 488 def _setup_soft_keywords(self) -> None: 489 soft_keywords = sorted(self.callmakervisitor.soft_keywords) 490 self.print("static char *soft_keywords[] = {") 491 with self.indent(): 492 for keyword in soft_keywords: 493 self.print(f'"{keyword}",') 494 self.print("NULL,") 495 self.print("};") 496 497 def _set_up_token_start_metadata_extraction(self) -> None: 498 self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {") 499 with self.indent(): 500 self.print("p->error_indicator = 1;") 501 self.add_return("NULL") 502 self.print("}") 503 self.print("int _start_lineno = p->tokens[_mark]->lineno;") 504 self.print("UNUSED(_start_lineno); // Only used by EXTRA macro") 505 self.print("int _start_col_offset = p->tokens[_mark]->col_offset;") 506 self.print("UNUSED(_start_col_offset); // Only used by EXTRA macro") 507 508 def _set_up_token_end_metadata_extraction(self) -> None: 509 self.print("Token *_token = _PyPegen_get_last_nonnwhitespace_token(p);") 510 self.print("if (_token == NULL) {") 511 with self.indent(): 512 self.add_return("NULL") 513 self.print("}") 514 self.print("int _end_lineno = _token->end_lineno;") 515 self.print("UNUSED(_end_lineno); // Only used by EXTRA macro") 516 self.print("int _end_col_offset = _token->end_col_offset;") 517 self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro") 518 519 def _check_for_errors(self) -> None: 520 self.print("if (p->error_indicator) {") 521 with self.indent(): 522 self.add_return("NULL") 523 self.print("}") 524 525 def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: 526 self.print("{") 527 with self.indent(): 528 self.add_level() 529 self.print(f"{result_type} _res = NULL;") 530 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 531 with self.indent(): 532 self.add_return("_res") 533 self.print("}") 534 self.print("int _mark = p->mark;") 535 self.print("int _resmark = p->mark;") 536 self.print("while (1) {") 537 with self.indent(): 538 self.call_with_errorcheck_return( 539 f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res" 540 ) 541 self.print("p->mark = _mark;") 542 self.print("p->in_raw_rule++;") 543 self.print(f"void *_raw = {node.name}_raw(p);") 544 self.print("p->in_raw_rule--;") 545 self.print("if (p->error_indicator) {") 546 with self.indent(): 547 self.add_return("NULL") 548 self.print("}") 549 self.print("if (_raw == NULL || p->mark <= _resmark)") 550 with self.indent(): 551 self.print("break;") 552 self.print(f"_resmark = p->mark;") 553 self.print("_res = _raw;") 554 self.print("}") 555 self.print(f"p->mark = _resmark;") 556 self.add_return("_res") 557 self.print("}") 558 self.print(f"static {result_type}") 559 self.print(f"{node.name}_raw(Parser *p)") 560 561 def _should_memoize(self, node: Rule) -> bool: 562 return node.memo and not node.left_recursive 563 564 def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None: 565 memoize = self._should_memoize(node) 566 567 with self.indent(): 568 self.add_level() 569 self._check_for_errors() 570 self.print(f"{result_type} _res = NULL;") 571 if memoize: 572 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 573 with self.indent(): 574 self.add_return("_res") 575 self.print("}") 576 self.print("int _mark = p->mark;") 577 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): 578 self._set_up_token_start_metadata_extraction() 579 self.visit( 580 rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name, 581 ) 582 if self.debug: 583 self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));') 584 self.print("_res = NULL;") 585 self.print(" done:") 586 with self.indent(): 587 if memoize: 588 self.print(f"_PyPegen_insert_memo(p, _mark, {node.name}_type, _res);") 589 self.add_return("_res") 590 591 def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: 592 memoize = self._should_memoize(node) 593 is_repeat1 = node.name.startswith("_loop1") 594 595 with self.indent(): 596 self.add_level() 597 self._check_for_errors() 598 self.print("void *_res = NULL;") 599 if memoize: 600 self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") 601 with self.indent(): 602 self.add_return("_res") 603 self.print("}") 604 self.print("int _mark = p->mark;") 605 self.print("int _start_mark = p->mark;") 606 self.print("void **_children = PyMem_Malloc(sizeof(void *));") 607 self.out_of_memory_return(f"!_children") 608 self.print("Py_ssize_t _children_capacity = 1;") 609 self.print("Py_ssize_t _n = 0;") 610 if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): 611 self._set_up_token_start_metadata_extraction() 612 self.visit( 613 rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name, 614 ) 615 if is_repeat1: 616 self.print("if (_n == 0 || p->error_indicator) {") 617 with self.indent(): 618 self.print("PyMem_Free(_children);") 619 self.add_return("NULL") 620 self.print("}") 621 self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);") 622 self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") 623 self.print("for (int i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);") 624 self.print("PyMem_Free(_children);") 625 if node.name: 626 self.print(f"_PyPegen_insert_memo(p, _start_mark, {node.name}_type, _seq);") 627 self.add_return("_seq") 628 629 def visit_Rule(self, node: Rule) -> None: 630 is_loop = node.is_loop() 631 is_gather = node.is_gather() 632 rhs = node.flatten() 633 if is_loop or is_gather: 634 result_type = "asdl_seq *" 635 elif node.type: 636 result_type = node.type 637 else: 638 result_type = "void *" 639 640 for line in str(node).splitlines(): 641 self.print(f"// {line}") 642 if node.left_recursive and node.leader: 643 self.print(f"static {result_type} {node.name}_raw(Parser *);") 644 645 self.print(f"static {result_type}") 646 self.print(f"{node.name}_rule(Parser *p)") 647 648 if node.left_recursive and node.leader: 649 self._set_up_rule_memoization(node, result_type) 650 651 self.print("{") 652 if is_loop: 653 self._handle_loop_rule_body(node, rhs) 654 else: 655 self._handle_default_rule_body(node, rhs, result_type) 656 self.print("}") 657 658 def visit_NamedItem(self, node: NamedItem) -> None: 659 call = self.callmakervisitor.generate_call(node) 660 if call.assigned_variable: 661 call.assigned_variable = self.dedupe(call.assigned_variable) 662 self.print(call) 663 664 def visit_Rhs( 665 self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] 666 ) -> None: 667 if is_loop: 668 assert len(node.alts) == 1 669 for alt in node.alts: 670 self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename) 671 672 def join_conditions(self, keyword: str, node: Any) -> None: 673 self.print(f"{keyword} (") 674 with self.indent(): 675 first = True 676 for item in node.items: 677 if first: 678 first = False 679 else: 680 self.print("&&") 681 self.visit(item) 682 self.print(")") 683 684 def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: 685 self.print(f"_res = {node.action};") 686 687 self.print("if (_res == NULL && PyErr_Occurred()) {") 688 with self.indent(): 689 self.print("p->error_indicator = 1;") 690 if cleanup_code: 691 self.print(cleanup_code) 692 self.add_return("NULL") 693 self.print("}") 694 695 if self.debug: 696 self.print( 697 f'D(fprintf(stderr, "Hit with action [%d-%d]: %s\\n", _mark, p->mark, "{node}"));' 698 ) 699 700 def emit_default_action(self, is_gather: bool, node: Alt) -> None: 701 if len(self.local_variable_names) > 1: 702 if is_gather: 703 assert len(self.local_variable_names) == 2 704 self.print( 705 f"_res = _PyPegen_seq_insert_in_front(p, " 706 f"{self.local_variable_names[0]}, {self.local_variable_names[1]});" 707 ) 708 else: 709 if self.debug: 710 self.print( 711 f'D(fprintf(stderr, "Hit without action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));' 712 ) 713 self.print( 714 f"_res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});" 715 ) 716 else: 717 if self.debug: 718 self.print( 719 f'D(fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));' 720 ) 721 self.print(f"_res = {self.local_variable_names[0]};") 722 723 def emit_dummy_action(self) -> None: 724 self.print("_res = _PyPegen_dummy_name(p);") 725 726 def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: 727 self.join_conditions(keyword="if", node=node) 728 self.print("{") 729 # We have parsed successfully all the conditions for the option. 730 with self.indent(): 731 node_str = str(node).replace('"', '\\"') 732 self.print( 733 f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));' 734 ) 735 # Prepare to emit the rule action and do so 736 if node.action and "EXTRA" in node.action: 737 self._set_up_token_end_metadata_extraction() 738 if self.skip_actions: 739 self.emit_dummy_action() 740 elif node.action: 741 self.emit_action(node) 742 else: 743 self.emit_default_action(is_gather, node) 744 745 # As the current option has parsed correctly, do not continue with the rest. 746 self.print(f"goto done;") 747 self.print("}") 748 749 def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: 750 # Condition of the main body of the alternative 751 self.join_conditions(keyword="while", node=node) 752 self.print("{") 753 # We have parsed successfully one item! 754 with self.indent(): 755 # Prepare to emit the rule action and do so 756 if node.action and "EXTRA" in node.action: 757 self._set_up_token_end_metadata_extraction() 758 if self.skip_actions: 759 self.emit_dummy_action() 760 elif node.action: 761 self.emit_action(node, cleanup_code="PyMem_Free(_children);") 762 else: 763 self.emit_default_action(is_gather, node) 764 765 # Add the result of rule to the temporary buffer of children. This buffer 766 # will populate later an asdl_seq with all elements to return. 767 self.print("if (_n == _children_capacity) {") 768 with self.indent(): 769 self.print("_children_capacity *= 2;") 770 self.print( 771 "void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));" 772 ) 773 self.out_of_memory_return(f"!_new_children") 774 self.print("_children = _new_children;") 775 self.print("}") 776 self.print("_children[_n++] = _res;") 777 self.print("_mark = p->mark;") 778 self.print("}") 779 780 def visit_Alt( 781 self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] 782 ) -> None: 783 if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'): 784 self.print(f"if (p->call_invalid_rules) {{ // {node}") 785 else: 786 self.print(f"{{ // {node}") 787 with self.indent(): 788 self._check_for_errors() 789 node_str = str(node).replace('"', '\\"') 790 self.print( 791 f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));' 792 ) 793 # Prepare variable declarations for the alternative 794 vars = self.collect_vars(node) 795 for v, var_type in sorted(item for item in vars.items() if item[0] is not None): 796 if not var_type: 797 var_type = "void *" 798 else: 799 var_type += " " 800 if v == "_cut_var": 801 v += " = 0" # cut_var must be initialized 802 self.print(f"{var_type}{v};") 803 if v.startswith("_opt_var"): 804 self.print(f"UNUSED({v}); // Silence compiler warnings") 805 806 with self.local_variable_context(): 807 if is_loop: 808 self.handle_alt_loop(node, is_gather, rulename) 809 else: 810 self.handle_alt_normal(node, is_gather, rulename) 811 812 self.print("p->mark = _mark;") 813 node_str = str(node).replace('"', '\\"') 814 self.print( 815 f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n" 816 f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));' 817 ) 818 if "_cut_var" in vars: 819 self.print("if (_cut_var) {") 820 with self.indent(): 821 self.add_return("NULL") 822 self.print("}") 823 self.print("}") 824 825 def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: 826 types = {} 827 with self.local_variable_context(): 828 for item in node.items: 829 name, type = self.add_var(item) 830 types[name] = type 831 return types 832 833 def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]: 834 call = self.callmakervisitor.generate_call(node.item) 835 name = node.name if node.name else call.assigned_variable 836 if name is not None: 837 name = self.dedupe(name) 838 return_type = call.return_type if node.type is None else node.type 839 return name, return_type 840