1from __future__ import annotations 2 3from abc import abstractmethod 4from typing import ( 5 AbstractSet, 6 Any, 7 Dict, 8 Iterable, 9 Iterator, 10 List, 11 Optional, 12 Set, 13 Tuple, 14 TYPE_CHECKING, 15 Union, 16) 17 18 19if TYPE_CHECKING: 20 from pegen.parser_generator import ParserGenerator 21 22 23class GrammarError(Exception): 24 pass 25 26 27class GrammarVisitor: 28 def visit(self, node: Any, *args: Any, **kwargs: Any) -> Any: 29 """Visit a node.""" 30 method = "visit_" + node.__class__.__name__ 31 visitor = getattr(self, method, self.generic_visit) 32 return visitor(node, *args, **kwargs) 33 34 def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None: 35 """Called if no explicit visitor function exists for a node.""" 36 for value in node: 37 if isinstance(value, list): 38 for item in value: 39 self.visit(item, *args, **kwargs) 40 else: 41 self.visit(value, *args, **kwargs) 42 43 44class Grammar: 45 def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]): 46 self.rules = {rule.name: rule for rule in rules} 47 self.metas = dict(metas) 48 49 def __str__(self) -> str: 50 return "\n".join(str(rule) for name, rule in self.rules.items()) 51 52 def __repr__(self) -> str: 53 lines = ["Grammar("] 54 lines.append(" [") 55 for rule in self.rules.values(): 56 lines.append(f" {repr(rule)},") 57 lines.append(" ],") 58 lines.append(" {repr(list(self.metas.items()))}") 59 lines.append(")") 60 return "\n".join(lines) 61 62 def __iter__(self) -> Iterator[Rule]: 63 yield from self.rules.values() 64 65 66# Global flag whether we want actions in __str__() -- default off. 67SIMPLE_STR = True 68 69 70class Rule: 71 def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None): 72 self.name = name 73 self.type = type 74 self.rhs = rhs 75 self.memo = bool(memo) 76 self.visited = False 77 self.nullable = False 78 self.left_recursive = False 79 self.leader = False 80 81 def is_loop(self) -> bool: 82 return self.name.startswith("_loop") 83 84 def is_gather(self) -> bool: 85 return self.name.startswith("_gather") 86 87 def __str__(self) -> str: 88 if SIMPLE_STR or self.type is None: 89 res = f"{self.name}: {self.rhs}" 90 else: 91 res = f"{self.name}[{self.type}]: {self.rhs}" 92 if len(res) < 88: 93 return res 94 lines = [res.split(":")[0] + ":"] 95 lines += [f" | {alt}" for alt in self.rhs.alts] 96 return "\n".join(lines) 97 98 def __repr__(self) -> str: 99 return f"Rule({self.name!r}, {self.type!r}, {self.rhs!r})" 100 101 def __iter__(self) -> Iterator[Rhs]: 102 yield self.rhs 103 104 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 105 if self.visited: 106 # A left-recursive rule is considered non-nullable. 107 return False 108 self.visited = True 109 self.nullable = self.rhs.nullable_visit(rules) 110 return self.nullable 111 112 def initial_names(self) -> AbstractSet[str]: 113 return self.rhs.initial_names() 114 115 def flatten(self) -> Rhs: 116 # If it's a single parenthesized group, flatten it. 117 rhs = self.rhs 118 if ( 119 not self.is_loop() 120 and len(rhs.alts) == 1 121 and len(rhs.alts[0].items) == 1 122 and isinstance(rhs.alts[0].items[0].item, Group) 123 ): 124 rhs = rhs.alts[0].items[0].item.rhs 125 return rhs 126 127 def collect_todo(self, gen: ParserGenerator) -> None: 128 rhs = self.flatten() 129 rhs.collect_todo(gen) 130 131 132class Leaf: 133 def __init__(self, value: str): 134 self.value = value 135 136 def __str__(self) -> str: 137 return self.value 138 139 def __iter__(self) -> Iterable[str]: 140 if False: 141 yield 142 143 @abstractmethod 144 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 145 raise NotImplementedError 146 147 @abstractmethod 148 def initial_names(self) -> AbstractSet[str]: 149 raise NotImplementedError 150 151 152class NameLeaf(Leaf): 153 """The value is the name.""" 154 155 def __str__(self) -> str: 156 if self.value == "ENDMARKER": 157 return "$" 158 return super().__str__() 159 160 def __repr__(self) -> str: 161 return f"NameLeaf({self.value!r})" 162 163 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 164 if self.value in rules: 165 return rules[self.value].nullable_visit(rules) 166 # Token or unknown; never empty. 167 return False 168 169 def initial_names(self) -> AbstractSet[str]: 170 return {self.value} 171 172 173class StringLeaf(Leaf): 174 """The value is a string literal, including quotes.""" 175 176 def __repr__(self) -> str: 177 return f"StringLeaf({self.value!r})" 178 179 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 180 # The string token '' is considered empty. 181 return not self.value 182 183 def initial_names(self) -> AbstractSet[str]: 184 return set() 185 186 187class Rhs: 188 def __init__(self, alts: List[Alt]): 189 self.alts = alts 190 self.memo: Optional[Tuple[Optional[str], str]] = None 191 192 def __str__(self) -> str: 193 return " | ".join(str(alt) for alt in self.alts) 194 195 def __repr__(self) -> str: 196 return f"Rhs({self.alts!r})" 197 198 def __iter__(self) -> Iterator[List[Alt]]: 199 yield self.alts 200 201 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 202 for alt in self.alts: 203 if alt.nullable_visit(rules): 204 return True 205 return False 206 207 def initial_names(self) -> AbstractSet[str]: 208 names: Set[str] = set() 209 for alt in self.alts: 210 names |= alt.initial_names() 211 return names 212 213 def collect_todo(self, gen: ParserGenerator) -> None: 214 for alt in self.alts: 215 alt.collect_todo(gen) 216 217 218class Alt: 219 def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None): 220 self.items = items 221 self.icut = icut 222 self.action = action 223 224 def __str__(self) -> str: 225 core = " ".join(str(item) for item in self.items) 226 if not SIMPLE_STR and self.action: 227 return f"{core} {{ {self.action} }}" 228 else: 229 return core 230 231 def __repr__(self) -> str: 232 args = [repr(self.items)] 233 if self.icut >= 0: 234 args.append(f"icut={self.icut}") 235 if self.action: 236 args.append(f"action={self.action!r}") 237 return f"Alt({', '.join(args)})" 238 239 def __iter__(self) -> Iterator[List[NamedItem]]: 240 yield self.items 241 242 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 243 for item in self.items: 244 if not item.nullable_visit(rules): 245 return False 246 return True 247 248 def initial_names(self) -> AbstractSet[str]: 249 names: Set[str] = set() 250 for item in self.items: 251 names |= item.initial_names() 252 if not item.nullable: 253 break 254 return names 255 256 def collect_todo(self, gen: ParserGenerator) -> None: 257 for item in self.items: 258 item.collect_todo(gen) 259 260 261class NamedItem: 262 def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None): 263 self.name = name 264 self.item = item 265 self.type = type 266 self.nullable = False 267 268 def __str__(self) -> str: 269 if not SIMPLE_STR and self.name: 270 return f"{self.name}={self.item}" 271 else: 272 return str(self.item) 273 274 def __repr__(self) -> str: 275 return f"NamedItem({self.name!r}, {self.item!r})" 276 277 def __iter__(self) -> Iterator[Item]: 278 yield self.item 279 280 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 281 self.nullable = self.item.nullable_visit(rules) 282 return self.nullable 283 284 def initial_names(self) -> AbstractSet[str]: 285 return self.item.initial_names() 286 287 def collect_todo(self, gen: ParserGenerator) -> None: 288 gen.callmakervisitor.visit(self.item) 289 290 291class Forced: 292 def __init__(self, node: Plain): 293 self.node = node 294 295 def __str__(self) -> str: 296 return f"&&{self.node}" 297 298 def __iter__(self) -> Iterator[Plain]: 299 yield self.node 300 301 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 302 return True 303 304 def initial_names(self) -> AbstractSet[str]: 305 return set() 306 307 308class Lookahead: 309 def __init__(self, node: Plain, sign: str): 310 self.node = node 311 self.sign = sign 312 313 def __str__(self) -> str: 314 return f"{self.sign}{self.node}" 315 316 def __iter__(self) -> Iterator[Plain]: 317 yield self.node 318 319 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 320 return True 321 322 def initial_names(self) -> AbstractSet[str]: 323 return set() 324 325 326class PositiveLookahead(Lookahead): 327 def __init__(self, node: Plain): 328 super().__init__(node, "&") 329 330 def __repr__(self) -> str: 331 return f"PositiveLookahead({self.node!r})" 332 333 334class NegativeLookahead(Lookahead): 335 def __init__(self, node: Plain): 336 super().__init__(node, "!") 337 338 def __repr__(self) -> str: 339 return f"NegativeLookahead({self.node!r})" 340 341 342class Opt: 343 def __init__(self, node: Item): 344 self.node = node 345 346 def __str__(self) -> str: 347 s = str(self.node) 348 # TODO: Decide whether to use [X] or X? based on type of X 349 if " " in s: 350 return f"[{s}]" 351 else: 352 return f"{s}?" 353 354 def __repr__(self) -> str: 355 return f"Opt({self.node!r})" 356 357 def __iter__(self) -> Iterator[Item]: 358 yield self.node 359 360 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 361 return True 362 363 def initial_names(self) -> AbstractSet[str]: 364 return self.node.initial_names() 365 366 367class Repeat: 368 """Shared base class for x* and x+.""" 369 370 def __init__(self, node: Plain): 371 self.node = node 372 self.memo: Optional[Tuple[Optional[str], str]] = None 373 374 @abstractmethod 375 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 376 raise NotImplementedError 377 378 def __iter__(self) -> Iterator[Plain]: 379 yield self.node 380 381 def initial_names(self) -> AbstractSet[str]: 382 return self.node.initial_names() 383 384 385class Repeat0(Repeat): 386 def __str__(self) -> str: 387 s = str(self.node) 388 # TODO: Decide whether to use (X)* or X* based on type of X 389 if " " in s: 390 return f"({s})*" 391 else: 392 return f"{s}*" 393 394 def __repr__(self) -> str: 395 return f"Repeat0({self.node!r})" 396 397 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 398 return True 399 400 401class Repeat1(Repeat): 402 def __str__(self) -> str: 403 s = str(self.node) 404 # TODO: Decide whether to use (X)+ or X+ based on type of X 405 if " " in s: 406 return f"({s})+" 407 else: 408 return f"{s}+" 409 410 def __repr__(self) -> str: 411 return f"Repeat1({self.node!r})" 412 413 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 414 return False 415 416 417class Gather(Repeat): 418 def __init__(self, separator: Plain, node: Plain): 419 self.separator = separator 420 self.node = node 421 422 def __str__(self) -> str: 423 return f"{self.separator!s}.{self.node!s}+" 424 425 def __repr__(self) -> str: 426 return f"Gather({self.separator!r}, {self.node!r})" 427 428 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 429 return False 430 431 432class Group: 433 def __init__(self, rhs: Rhs): 434 self.rhs = rhs 435 436 def __str__(self) -> str: 437 return f"({self.rhs})" 438 439 def __repr__(self) -> str: 440 return f"Group({self.rhs!r})" 441 442 def __iter__(self) -> Iterator[Rhs]: 443 yield self.rhs 444 445 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 446 return self.rhs.nullable_visit(rules) 447 448 def initial_names(self) -> AbstractSet[str]: 449 return self.rhs.initial_names() 450 451 452class Cut: 453 def __init__(self) -> None: 454 pass 455 456 def __repr__(self) -> str: 457 return f"Cut()" 458 459 def __str__(self) -> str: 460 return f"~" 461 462 def __iter__(self) -> Iterator[Tuple[str, str]]: 463 if False: 464 yield 465 466 def __eq__(self, other: object) -> bool: 467 if not isinstance(other, Cut): 468 return NotImplemented 469 return True 470 471 def nullable_visit(self, rules: Dict[str, Rule]) -> bool: 472 return True 473 474 def initial_names(self) -> AbstractSet[str]: 475 return set() 476 477 478Plain = Union[Leaf, Group] 479Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut] 480RuleName = Tuple[str, str] 481MetaTuple = Tuple[str, Optional[str]] 482MetaList = List[MetaTuple] 483RuleList = List[Rule] 484NamedItemList = List[NamedItem] 485LookaheadOrCut = Union[Lookahead, Cut] 486