1# Copyright 2006 Google, Inc. All Rights Reserved. 2# Licensed to PSF under a Contributor Agreement. 3 4""" 5Python parse tree definitions. 6 7This is a very concrete parse tree; we need to keep every token and 8even the comments and whitespace between tokens. 9 10There's also a pattern matching implementation here. 11""" 12 13__author__ = "Guido van Rossum <guido@python.org>" 14 15import sys 16from io import StringIO 17 18HUGE = 0x7FFFFFFF # maximum repeat count, default max 19 20_type_reprs = {} 21def type_repr(type_num): 22 global _type_reprs 23 if not _type_reprs: 24 from .pygram import python_symbols 25 # printing tokens is possible but not as useful 26 # from .pgen2 import token // token.__dict__.items(): 27 for name, val in python_symbols.__dict__.items(): 28 if type(val) == int: _type_reprs[val] = name 29 return _type_reprs.setdefault(type_num, type_num) 30 31class Base(object): 32 33 """ 34 Abstract base class for Node and Leaf. 35 36 This provides some default functionality and boilerplate using the 37 template pattern. 38 39 A node may be a subnode of at most one parent. 40 """ 41 42 # Default values for instance variables 43 type = None # int: token number (< 256) or symbol number (>= 256) 44 parent = None # Parent node pointer, or None 45 children = () # Tuple of subnodes 46 was_changed = False 47 was_checked = False 48 49 def __new__(cls, *args, **kwds): 50 """Constructor that prevents Base from being instantiated.""" 51 assert cls is not Base, "Cannot instantiate Base" 52 return object.__new__(cls) 53 54 def __eq__(self, other): 55 """ 56 Compare two nodes for equality. 57 58 This calls the method _eq(). 59 """ 60 if self.__class__ is not other.__class__: 61 return NotImplemented 62 return self._eq(other) 63 64 __hash__ = None # For Py3 compatibility. 65 66 def _eq(self, other): 67 """ 68 Compare two nodes for equality. 69 70 This is called by __eq__ and __ne__. It is only called if the two nodes 71 have the same type. This must be implemented by the concrete subclass. 72 Nodes should be considered equal if they have the same structure, 73 ignoring the prefix string and other context information. 74 """ 75 raise NotImplementedError 76 77 def clone(self): 78 """ 79 Return a cloned (deep) copy of self. 80 81 This must be implemented by the concrete subclass. 82 """ 83 raise NotImplementedError 84 85 def post_order(self): 86 """ 87 Return a post-order iterator for the tree. 88 89 This must be implemented by the concrete subclass. 90 """ 91 raise NotImplementedError 92 93 def pre_order(self): 94 """ 95 Return a pre-order iterator for the tree. 96 97 This must be implemented by the concrete subclass. 98 """ 99 raise NotImplementedError 100 101 def replace(self, new): 102 """Replace this node with a new one in the parent.""" 103 assert self.parent is not None, str(self) 104 assert new is not None 105 if not isinstance(new, list): 106 new = [new] 107 l_children = [] 108 found = False 109 for ch in self.parent.children: 110 if ch is self: 111 assert not found, (self.parent.children, self, new) 112 if new is not None: 113 l_children.extend(new) 114 found = True 115 else: 116 l_children.append(ch) 117 assert found, (self.children, self, new) 118 self.parent.changed() 119 self.parent.children = l_children 120 for x in new: 121 x.parent = self.parent 122 self.parent = None 123 124 def get_lineno(self): 125 """Return the line number which generated the invocant node.""" 126 node = self 127 while not isinstance(node, Leaf): 128 if not node.children: 129 return 130 node = node.children[0] 131 return node.lineno 132 133 def changed(self): 134 if self.parent: 135 self.parent.changed() 136 self.was_changed = True 137 138 def remove(self): 139 """ 140 Remove the node from the tree. Returns the position of the node in its 141 parent's children before it was removed. 142 """ 143 if self.parent: 144 for i, node in enumerate(self.parent.children): 145 if node is self: 146 self.parent.changed() 147 del self.parent.children[i] 148 self.parent = None 149 return i 150 151 @property 152 def next_sibling(self): 153 """ 154 The node immediately following the invocant in their parent's children 155 list. If the invocant does not have a next sibling, it is None 156 """ 157 if self.parent is None: 158 return None 159 160 # Can't use index(); we need to test by identity 161 for i, child in enumerate(self.parent.children): 162 if child is self: 163 try: 164 return self.parent.children[i+1] 165 except IndexError: 166 return None 167 168 @property 169 def prev_sibling(self): 170 """ 171 The node immediately preceding the invocant in their parent's children 172 list. If the invocant does not have a previous sibling, it is None. 173 """ 174 if self.parent is None: 175 return None 176 177 # Can't use index(); we need to test by identity 178 for i, child in enumerate(self.parent.children): 179 if child is self: 180 if i == 0: 181 return None 182 return self.parent.children[i-1] 183 184 def leaves(self): 185 for child in self.children: 186 yield from child.leaves() 187 188 def depth(self): 189 if self.parent is None: 190 return 0 191 return 1 + self.parent.depth() 192 193 def get_suffix(self): 194 """ 195 Return the string immediately following the invocant node. This is 196 effectively equivalent to node.next_sibling.prefix 197 """ 198 next_sib = self.next_sibling 199 if next_sib is None: 200 return "" 201 return next_sib.prefix 202 203 if sys.version_info < (3, 0): 204 def __str__(self): 205 return str(self).encode("ascii") 206 207class Node(Base): 208 209 """Concrete implementation for interior nodes.""" 210 211 def __init__(self,type, children, 212 context=None, 213 prefix=None, 214 fixers_applied=None): 215 """ 216 Initializer. 217 218 Takes a type constant (a symbol number >= 256), a sequence of 219 child nodes, and an optional context keyword argument. 220 221 As a side effect, the parent pointers of the children are updated. 222 """ 223 assert type >= 256, type 224 self.type = type 225 self.children = list(children) 226 for ch in self.children: 227 assert ch.parent is None, repr(ch) 228 ch.parent = self 229 if prefix is not None: 230 self.prefix = prefix 231 if fixers_applied: 232 self.fixers_applied = fixers_applied[:] 233 else: 234 self.fixers_applied = None 235 236 def __repr__(self): 237 """Return a canonical string representation.""" 238 return "%s(%s, %r)" % (self.__class__.__name__, 239 type_repr(self.type), 240 self.children) 241 242 def __unicode__(self): 243 """ 244 Return a pretty string representation. 245 246 This reproduces the input source exactly. 247 """ 248 return "".join(map(str, self.children)) 249 250 if sys.version_info > (3, 0): 251 __str__ = __unicode__ 252 253 def _eq(self, other): 254 """Compare two nodes for equality.""" 255 return (self.type, self.children) == (other.type, other.children) 256 257 def clone(self): 258 """Return a cloned (deep) copy of self.""" 259 return Node(self.type, [ch.clone() for ch in self.children], 260 fixers_applied=self.fixers_applied) 261 262 def post_order(self): 263 """Return a post-order iterator for the tree.""" 264 for child in self.children: 265 yield from child.post_order() 266 yield self 267 268 def pre_order(self): 269 """Return a pre-order iterator for the tree.""" 270 yield self 271 for child in self.children: 272 yield from child.pre_order() 273 274 @property 275 def prefix(self): 276 """ 277 The whitespace and comments preceding this node in the input. 278 """ 279 if not self.children: 280 return "" 281 return self.children[0].prefix 282 283 @prefix.setter 284 def prefix(self, prefix): 285 if self.children: 286 self.children[0].prefix = prefix 287 288 def set_child(self, i, child): 289 """ 290 Equivalent to 'node.children[i] = child'. This method also sets the 291 child's parent attribute appropriately. 292 """ 293 child.parent = self 294 self.children[i].parent = None 295 self.children[i] = child 296 self.changed() 297 298 def insert_child(self, i, child): 299 """ 300 Equivalent to 'node.children.insert(i, child)'. This method also sets 301 the child's parent attribute appropriately. 302 """ 303 child.parent = self 304 self.children.insert(i, child) 305 self.changed() 306 307 def append_child(self, child): 308 """ 309 Equivalent to 'node.children.append(child)'. This method also sets the 310 child's parent attribute appropriately. 311 """ 312 child.parent = self 313 self.children.append(child) 314 self.changed() 315 316 317class Leaf(Base): 318 319 """Concrete implementation for leaf nodes.""" 320 321 # Default values for instance variables 322 _prefix = "" # Whitespace and comments preceding this token in the input 323 lineno = 0 # Line where this token starts in the input 324 column = 0 # Column where this token tarts in the input 325 326 def __init__(self, type, value, 327 context=None, 328 prefix=None, 329 fixers_applied=[]): 330 """ 331 Initializer. 332 333 Takes a type constant (a token number < 256), a string value, and an 334 optional context keyword argument. 335 """ 336 assert 0 <= type < 256, type 337 if context is not None: 338 self._prefix, (self.lineno, self.column) = context 339 self.type = type 340 self.value = value 341 if prefix is not None: 342 self._prefix = prefix 343 self.fixers_applied = fixers_applied[:] 344 345 def __repr__(self): 346 """Return a canonical string representation.""" 347 return "%s(%r, %r)" % (self.__class__.__name__, 348 self.type, 349 self.value) 350 351 def __unicode__(self): 352 """ 353 Return a pretty string representation. 354 355 This reproduces the input source exactly. 356 """ 357 return self.prefix + str(self.value) 358 359 if sys.version_info > (3, 0): 360 __str__ = __unicode__ 361 362 def _eq(self, other): 363 """Compare two nodes for equality.""" 364 return (self.type, self.value) == (other.type, other.value) 365 366 def clone(self): 367 """Return a cloned (deep) copy of self.""" 368 return Leaf(self.type, self.value, 369 (self.prefix, (self.lineno, self.column)), 370 fixers_applied=self.fixers_applied) 371 372 def leaves(self): 373 yield self 374 375 def post_order(self): 376 """Return a post-order iterator for the tree.""" 377 yield self 378 379 def pre_order(self): 380 """Return a pre-order iterator for the tree.""" 381 yield self 382 383 @property 384 def prefix(self): 385 """ 386 The whitespace and comments preceding this token in the input. 387 """ 388 return self._prefix 389 390 @prefix.setter 391 def prefix(self, prefix): 392 self.changed() 393 self._prefix = prefix 394 395def convert(gr, raw_node): 396 """ 397 Convert raw node information to a Node or Leaf instance. 398 399 This is passed to the parser driver which calls it whenever a reduction of a 400 grammar rule produces a new complete node, so that the tree is build 401 strictly bottom-up. 402 """ 403 type, value, context, children = raw_node 404 if children or type in gr.number2symbol: 405 # If there's exactly one child, return that child instead of 406 # creating a new node. 407 if len(children) == 1: 408 return children[0] 409 return Node(type, children, context=context) 410 else: 411 return Leaf(type, value, context=context) 412 413 414class BasePattern(object): 415 416 """ 417 A pattern is a tree matching pattern. 418 419 It looks for a specific node type (token or symbol), and 420 optionally for a specific content. 421 422 This is an abstract base class. There are three concrete 423 subclasses: 424 425 - LeafPattern matches a single leaf node; 426 - NodePattern matches a single node (usually non-leaf); 427 - WildcardPattern matches a sequence of nodes of variable length. 428 """ 429 430 # Defaults for instance variables 431 type = None # Node type (token if < 256, symbol if >= 256) 432 content = None # Optional content matching pattern 433 name = None # Optional name used to store match in results dict 434 435 def __new__(cls, *args, **kwds): 436 """Constructor that prevents BasePattern from being instantiated.""" 437 assert cls is not BasePattern, "Cannot instantiate BasePattern" 438 return object.__new__(cls) 439 440 def __repr__(self): 441 args = [type_repr(self.type), self.content, self.name] 442 while args and args[-1] is None: 443 del args[-1] 444 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) 445 446 def optimize(self): 447 """ 448 A subclass can define this as a hook for optimizations. 449 450 Returns either self or another node with the same effect. 451 """ 452 return self 453 454 def match(self, node, results=None): 455 """ 456 Does this pattern exactly match a node? 457 458 Returns True if it matches, False if not. 459 460 If results is not None, it must be a dict which will be 461 updated with the nodes matching named subpatterns. 462 463 Default implementation for non-wildcard patterns. 464 """ 465 if self.type is not None and node.type != self.type: 466 return False 467 if self.content is not None: 468 r = None 469 if results is not None: 470 r = {} 471 if not self._submatch(node, r): 472 return False 473 if r: 474 results.update(r) 475 if results is not None and self.name: 476 results[self.name] = node 477 return True 478 479 def match_seq(self, nodes, results=None): 480 """ 481 Does this pattern exactly match a sequence of nodes? 482 483 Default implementation for non-wildcard patterns. 484 """ 485 if len(nodes) != 1: 486 return False 487 return self.match(nodes[0], results) 488 489 def generate_matches(self, nodes): 490 """ 491 Generator yielding all matches for this pattern. 492 493 Default implementation for non-wildcard patterns. 494 """ 495 r = {} 496 if nodes and self.match(nodes[0], r): 497 yield 1, r 498 499 500class LeafPattern(BasePattern): 501 502 def __init__(self, type=None, content=None, name=None): 503 """ 504 Initializer. Takes optional type, content, and name. 505 506 The type, if given must be a token type (< 256). If not given, 507 this matches any *leaf* node; the content may still be required. 508 509 The content, if given, must be a string. 510 511 If a name is given, the matching node is stored in the results 512 dict under that key. 513 """ 514 if type is not None: 515 assert 0 <= type < 256, type 516 if content is not None: 517 assert isinstance(content, str), repr(content) 518 self.type = type 519 self.content = content 520 self.name = name 521 522 def match(self, node, results=None): 523 """Override match() to insist on a leaf node.""" 524 if not isinstance(node, Leaf): 525 return False 526 return BasePattern.match(self, node, results) 527 528 def _submatch(self, node, results=None): 529 """ 530 Match the pattern's content to the node's children. 531 532 This assumes the node type matches and self.content is not None. 533 534 Returns True if it matches, False if not. 535 536 If results is not None, it must be a dict which will be 537 updated with the nodes matching named subpatterns. 538 539 When returning False, the results dict may still be updated. 540 """ 541 return self.content == node.value 542 543 544class NodePattern(BasePattern): 545 546 wildcards = False 547 548 def __init__(self, type=None, content=None, name=None): 549 """ 550 Initializer. Takes optional type, content, and name. 551 552 The type, if given, must be a symbol type (>= 256). If the 553 type is None this matches *any* single node (leaf or not), 554 except if content is not None, in which it only matches 555 non-leaf nodes that also match the content pattern. 556 557 The content, if not None, must be a sequence of Patterns that 558 must match the node's children exactly. If the content is 559 given, the type must not be None. 560 561 If a name is given, the matching node is stored in the results 562 dict under that key. 563 """ 564 if type is not None: 565 assert type >= 256, type 566 if content is not None: 567 assert not isinstance(content, str), repr(content) 568 content = list(content) 569 for i, item in enumerate(content): 570 assert isinstance(item, BasePattern), (i, item) 571 if isinstance(item, WildcardPattern): 572 self.wildcards = True 573 self.type = type 574 self.content = content 575 self.name = name 576 577 def _submatch(self, node, results=None): 578 """ 579 Match the pattern's content to the node's children. 580 581 This assumes the node type matches and self.content is not None. 582 583 Returns True if it matches, False if not. 584 585 If results is not None, it must be a dict which will be 586 updated with the nodes matching named subpatterns. 587 588 When returning False, the results dict may still be updated. 589 """ 590 if self.wildcards: 591 for c, r in generate_matches(self.content, node.children): 592 if c == len(node.children): 593 if results is not None: 594 results.update(r) 595 return True 596 return False 597 if len(self.content) != len(node.children): 598 return False 599 for subpattern, child in zip(self.content, node.children): 600 if not subpattern.match(child, results): 601 return False 602 return True 603 604 605class WildcardPattern(BasePattern): 606 607 """ 608 A wildcard pattern can match zero or more nodes. 609 610 This has all the flexibility needed to implement patterns like: 611 612 .* .+ .? .{m,n} 613 (a b c | d e | f) 614 (...)* (...)+ (...)? (...){m,n} 615 616 except it always uses non-greedy matching. 617 """ 618 619 def __init__(self, content=None, min=0, max=HUGE, name=None): 620 """ 621 Initializer. 622 623 Args: 624 content: optional sequence of subsequences of patterns; 625 if absent, matches one node; 626 if present, each subsequence is an alternative [*] 627 min: optional minimum number of times to match, default 0 628 max: optional maximum number of times to match, default HUGE 629 name: optional name assigned to this match 630 631 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is 632 equivalent to (a b c | d e | f g h); if content is None, 633 this is equivalent to '.' in regular expression terms. 634 The min and max parameters work as follows: 635 min=0, max=maxint: .* 636 min=1, max=maxint: .+ 637 min=0, max=1: .? 638 min=1, max=1: . 639 If content is not None, replace the dot with the parenthesized 640 list of alternatives, e.g. (a b c | d e | f g h)* 641 """ 642 assert 0 <= min <= max <= HUGE, (min, max) 643 if content is not None: 644 content = tuple(map(tuple, content)) # Protect against alterations 645 # Check sanity of alternatives 646 assert len(content), repr(content) # Can't have zero alternatives 647 for alt in content: 648 assert len(alt), repr(alt) # Can have empty alternatives 649 self.content = content 650 self.min = min 651 self.max = max 652 self.name = name 653 654 def optimize(self): 655 """Optimize certain stacked wildcard patterns.""" 656 subpattern = None 657 if (self.content is not None and 658 len(self.content) == 1 and len(self.content[0]) == 1): 659 subpattern = self.content[0][0] 660 if self.min == 1 and self.max == 1: 661 if self.content is None: 662 return NodePattern(name=self.name) 663 if subpattern is not None and self.name == subpattern.name: 664 return subpattern.optimize() 665 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and 666 subpattern.min <= 1 and self.name == subpattern.name): 667 return WildcardPattern(subpattern.content, 668 self.min*subpattern.min, 669 self.max*subpattern.max, 670 subpattern.name) 671 return self 672 673 def match(self, node, results=None): 674 """Does this pattern exactly match a node?""" 675 return self.match_seq([node], results) 676 677 def match_seq(self, nodes, results=None): 678 """Does this pattern exactly match a sequence of nodes?""" 679 for c, r in self.generate_matches(nodes): 680 if c == len(nodes): 681 if results is not None: 682 results.update(r) 683 if self.name: 684 results[self.name] = list(nodes) 685 return True 686 return False 687 688 def generate_matches(self, nodes): 689 """ 690 Generator yielding matches for a sequence of nodes. 691 692 Args: 693 nodes: sequence of nodes 694 695 Yields: 696 (count, results) tuples where: 697 count: the match comprises nodes[:count]; 698 results: dict containing named submatches. 699 """ 700 if self.content is None: 701 # Shortcut for special case (see __init__.__doc__) 702 for count in range(self.min, 1 + min(len(nodes), self.max)): 703 r = {} 704 if self.name: 705 r[self.name] = nodes[:count] 706 yield count, r 707 elif self.name == "bare_name": 708 yield self._bare_name_matches(nodes) 709 else: 710 # The reason for this is that hitting the recursion limit usually 711 # results in some ugly messages about how RuntimeErrors are being 712 # ignored. We only have to do this on CPython, though, because other 713 # implementations don't have this nasty bug in the first place. 714 if hasattr(sys, "getrefcount"): 715 save_stderr = sys.stderr 716 sys.stderr = StringIO() 717 try: 718 for count, r in self._recursive_matches(nodes, 0): 719 if self.name: 720 r[self.name] = nodes[:count] 721 yield count, r 722 except RuntimeError: 723 # We fall back to the iterative pattern matching scheme if the recursive 724 # scheme hits the recursion limit. 725 for count, r in self._iterative_matches(nodes): 726 if self.name: 727 r[self.name] = nodes[:count] 728 yield count, r 729 finally: 730 if hasattr(sys, "getrefcount"): 731 sys.stderr = save_stderr 732 733 def _iterative_matches(self, nodes): 734 """Helper to iteratively yield the matches.""" 735 nodelen = len(nodes) 736 if 0 >= self.min: 737 yield 0, {} 738 739 results = [] 740 # generate matches that use just one alt from self.content 741 for alt in self.content: 742 for c, r in generate_matches(alt, nodes): 743 yield c, r 744 results.append((c, r)) 745 746 # for each match, iterate down the nodes 747 while results: 748 new_results = [] 749 for c0, r0 in results: 750 # stop if the entire set of nodes has been matched 751 if c0 < nodelen and c0 <= self.max: 752 for alt in self.content: 753 for c1, r1 in generate_matches(alt, nodes[c0:]): 754 if c1 > 0: 755 r = {} 756 r.update(r0) 757 r.update(r1) 758 yield c0 + c1, r 759 new_results.append((c0 + c1, r)) 760 results = new_results 761 762 def _bare_name_matches(self, nodes): 763 """Special optimized matcher for bare_name.""" 764 count = 0 765 r = {} 766 done = False 767 max = len(nodes) 768 while not done and count < max: 769 done = True 770 for leaf in self.content: 771 if leaf[0].match(nodes[count], r): 772 count += 1 773 done = False 774 break 775 r[self.name] = nodes[:count] 776 return count, r 777 778 def _recursive_matches(self, nodes, count): 779 """Helper to recursively yield the matches.""" 780 assert self.content is not None 781 if count >= self.min: 782 yield 0, {} 783 if count < self.max: 784 for alt in self.content: 785 for c0, r0 in generate_matches(alt, nodes): 786 for c1, r1 in self._recursive_matches(nodes[c0:], count+1): 787 r = {} 788 r.update(r0) 789 r.update(r1) 790 yield c0 + c1, r 791 792 793class NegatedPattern(BasePattern): 794 795 def __init__(self, content=None): 796 """ 797 Initializer. 798 799 The argument is either a pattern or None. If it is None, this 800 only matches an empty sequence (effectively '$' in regex 801 lingo). If it is not None, this matches whenever the argument 802 pattern doesn't have any matches. 803 """ 804 if content is not None: 805 assert isinstance(content, BasePattern), repr(content) 806 self.content = content 807 808 def match(self, node): 809 # We never match a node in its entirety 810 return False 811 812 def match_seq(self, nodes): 813 # We only match an empty sequence of nodes in its entirety 814 return len(nodes) == 0 815 816 def generate_matches(self, nodes): 817 if self.content is None: 818 # Return a match if there is an empty sequence 819 if len(nodes) == 0: 820 yield 0, {} 821 else: 822 # Return a match if the argument pattern has no matches 823 for c, r in self.content.generate_matches(nodes): 824 return 825 yield 0, {} 826 827 828def generate_matches(patterns, nodes): 829 """ 830 Generator yielding matches for a sequence of patterns and nodes. 831 832 Args: 833 patterns: a sequence of patterns 834 nodes: a sequence of nodes 835 836 Yields: 837 (count, results) tuples where: 838 count: the entire sequence of patterns matches nodes[:count]; 839 results: dict containing named submatches. 840 """ 841 if not patterns: 842 yield 0, {} 843 else: 844 p, rest = patterns[0], patterns[1:] 845 for c0, r0 in p.generate_matches(nodes): 846 if not rest: 847 yield c0, r0 848 else: 849 for c1, r1 in generate_matches(rest, nodes[c0:]): 850 r = {} 851 r.update(r0) 852 r.update(r1) 853 yield c0 + c1, r 854