• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3"""
4Python parse tree definitions.
5
6This is a very concrete parse tree; we need to keep every token and
7even the comments and whitespace between tokens.
8
9There's also a pattern matching implementation here.
10"""
11
12__author__ = 'Guido van Rossum <guido@python.org>'
13
14import sys
15from io import StringIO
16from typing import List
17from typing import Optional
18from typing import Text
19from typing import Tuple
20from typing import Union
21
22HUGE = 0x7FFFFFFF  # maximum repeat count, default max
23
24_type_reprs = {}
25
26
27def type_repr(type_num):
28  global _type_reprs
29  if not _type_reprs:
30    from .pygram import python_symbols
31
32    # printing tokens is possible but not as useful
33    # from .pgen2 import token // token.__dict__.items():
34    for name, val in python_symbols.__dict__.items():
35      if isinstance(val, int):
36        _type_reprs[val] = name
37  return _type_reprs.setdefault(type_num, type_num)
38
39
40NL = Union['Node', 'Leaf']
41Context = Tuple[Text, Tuple[int, int]]
42RawNode = Tuple[int, Optional[Text], Optional[Context], Optional[List[NL]]]
43
44
45class Base(object):
46  """
47    Abstract base class for Node and Leaf.
48
49    This provides some default functionality and boilerplate using the
50    template pattern.
51
52    A node may be a subnode of at most one parent.
53    """
54
55  # Default values for instance variables
56  type = None  # int: token number (< 256) or symbol number (>= 256)
57  parent = None  # Parent node pointer, or None
58  children = ()  # Tuple of subnodes
59  was_changed = False
60  was_checked = False
61
62  def __new__(cls, *args, **kwds):
63    """Constructor that prevents Base from being instantiated."""
64    assert cls is not Base, 'Cannot instantiate Base'
65    return object.__new__(cls)
66
67  def __eq__(self, other):
68    """
69        Compare two nodes for equality.
70
71        This calls the method _eq().
72        """
73    if self.__class__ is not other.__class__:
74      return NotImplemented
75    return self._eq(other)
76
77  __hash__ = None  # For Py3 compatibility.
78
79  def _eq(self, other):
80    """
81        Compare two nodes for equality.
82
83        This is called by __eq__ and __ne__.  It is only called if the two nodes
84        have the same type.  This must be implemented by the concrete subclass.
85        Nodes should be considered equal if they have the same structure,
86        ignoring the prefix string and other context information.
87        """
88    raise NotImplementedError
89
90  def clone(self):
91    """
92        Return a cloned (deep) copy of self.
93
94        This must be implemented by the concrete subclass.
95        """
96    raise NotImplementedError
97
98  def post_order(self):
99    """
100        Return a post-order iterator for the tree.
101
102        This must be implemented by the concrete subclass.
103        """
104    raise NotImplementedError
105
106  def pre_order(self):
107    """
108        Return a pre-order iterator for the tree.
109
110        This must be implemented by the concrete subclass.
111        """
112    raise NotImplementedError
113
114  def replace(self, new):
115    """Replace this node with a new one in the parent."""
116    assert self.parent is not None, str(self)
117    assert new is not None
118    if not isinstance(new, list):
119      new = [new]
120    l_children = []
121    found = False
122    for ch in self.parent.children:
123      if ch is self:
124        assert not found, (self.parent.children, self, new)
125        if new is not None:
126          l_children.extend(new)
127        found = True
128      else:
129        l_children.append(ch)
130    assert found, (self.children, self, new)
131    self.parent.changed()
132    self.parent.children = l_children
133    for x in new:
134      x.parent = self.parent
135    self.parent = None
136
137  def get_lineno(self):
138    """Return the line number which generated the invocant node."""
139    node = self
140    while not isinstance(node, Leaf):
141      if not node.children:
142        return
143      node = node.children[0]
144    return node.lineno
145
146  def changed(self):
147    if self.parent:
148      self.parent.changed()
149    self.was_changed = True
150
151  def remove(self):
152    """
153        Remove the node from the tree. Returns the position of the node in its
154        parent's children before it was removed.
155        """
156    if self.parent:
157      for i, node in enumerate(self.parent.children):
158        if node is self:
159          self.parent.changed()
160          del self.parent.children[i]
161          self.parent = None
162          return i
163
164  @property
165  def next_sibling(self):
166    """
167        The node immediately following the invocant in their parent's children
168        list. If the invocant does not have a next sibling, it is None
169        """
170    if self.parent is None:
171      return None
172
173    # Can't use index(); we need to test by identity
174    for i, child in enumerate(self.parent.children):
175      if child is self:
176        try:
177          return self.parent.children[i + 1]
178        except IndexError:
179          return None
180
181  @property
182  def prev_sibling(self):
183    """
184        The node immediately preceding the invocant in their parent's children
185        list. If the invocant does not have a previous sibling, it is None.
186        """
187    if self.parent is None:
188      return None
189
190    # Can't use index(); we need to test by identity
191    for i, child in enumerate(self.parent.children):
192      if child is self:
193        if i == 0:
194          return None
195        return self.parent.children[i - 1]
196
197  def leaves(self):
198    for child in self.children:
199      yield from child.leaves()
200
201  def depth(self):
202    if self.parent is None:
203      return 0
204    return 1 + self.parent.depth()
205
206  def get_suffix(self):
207    """
208        Return the string immediately following the invocant node. This is
209        effectively equivalent to node.next_sibling.prefix
210        """
211    next_sib = self.next_sibling
212    if next_sib is None:
213      return ''
214    return next_sib.prefix
215
216  if sys.version_info < (3, 0):
217
218    def __str__(self):
219      return str(self).encode('ascii')
220
221
222class Node(Base):
223  """Concrete implementation for interior nodes."""
224
225  def __init__(self,
226               type,
227               children,
228               context=None,
229               prefix=None,
230               fixers_applied=None):
231    """
232        Initializer.
233
234        Takes a type constant (a symbol number >= 256), a sequence of
235        child nodes, and an optional context keyword argument.
236
237        As a side effect, the parent pointers of the children are updated.
238        """
239    assert type >= 256, type
240    self.type = type
241    self.children = list(children)
242    for ch in self.children:
243      assert ch.parent is None, repr(ch)
244      ch.parent = self
245    if prefix is not None:
246      self.prefix = prefix
247    if fixers_applied:
248      self.fixers_applied = fixers_applied[:]
249    else:
250      self.fixers_applied = None
251
252  def __repr__(self):
253    """Return a canonical string representation."""
254    return '%s(%s, %r)' % (self.__class__.__name__, type_repr(
255        self.type), self.children)
256
257  def __unicode__(self):
258    """
259        Return a pretty string representation.
260
261        This reproduces the input source exactly.
262        """
263    return ''.join(map(str, self.children))
264
265  if sys.version_info > (3, 0):
266    __str__ = __unicode__
267
268  def _eq(self, other):
269    """Compare two nodes for equality."""
270    return (self.type, self.children) == (other.type, other.children)
271
272  def clone(self):
273    """Return a cloned (deep) copy of self."""
274    return Node(
275        self.type, [ch.clone() for ch in self.children],
276        fixers_applied=self.fixers_applied)
277
278  def post_order(self):
279    """Return a post-order iterator for the tree."""
280    for child in self.children:
281      yield from child.post_order()
282    yield self
283
284  def pre_order(self):
285    """Return a pre-order iterator for the tree."""
286    yield self
287    for child in self.children:
288      yield from child.pre_order()
289
290  @property
291  def prefix(self):
292    """
293        The whitespace and comments preceding this node in the input.
294        """
295    if not self.children:
296      return ''
297    return self.children[0].prefix
298
299  @prefix.setter
300  def prefix(self, prefix):
301    if self.children:
302      self.children[0].prefix = prefix
303
304  def set_child(self, i, child):
305    """
306        Equivalent to 'node.children[i] = child'. This method also sets the
307        child's parent attribute appropriately.
308        """
309    child.parent = self
310    self.children[i].parent = None
311    self.children[i] = child
312    self.changed()
313
314  def insert_child(self, i, child):
315    """
316        Equivalent to 'node.children.insert(i, child)'. This method also sets
317        the child's parent attribute appropriately.
318        """
319    child.parent = self
320    self.children.insert(i, child)
321    self.changed()
322
323  def append_child(self, child):
324    """
325        Equivalent to 'node.children.append(child)'. This method also sets the
326        child's parent attribute appropriately.
327        """
328    child.parent = self
329    self.children.append(child)
330    self.changed()
331
332
333class Leaf(Base):
334  """Concrete implementation for leaf nodes."""
335
336  # Default values for instance variables
337  _prefix = ''  # Whitespace and comments preceding this token in the input
338  lineno = 0  # Line where this token starts in the input
339  column = 0  # Column where this token tarts in the input
340
341  def __init__(self, type, value, context=None, prefix=None, fixers_applied=[]):
342    """
343        Initializer.
344
345        Takes a type constant (a token number < 256), a string value, and an
346        optional context keyword argument.
347        """
348    assert 0 <= type < 256, type
349    if context is not None:
350      self._prefix, (self.lineno, self.column) = context
351    self.type = type
352    self.value = value
353    if prefix is not None:
354      self._prefix = prefix
355    self.fixers_applied = fixers_applied[:]
356
357  def __repr__(self):
358    """Return a canonical string representation."""
359    return '%s(%r, %r)' % (self.__class__.__name__, self.type, self.value)
360
361  def __unicode__(self):
362    """
363        Return a pretty string representation.
364
365        This reproduces the input source exactly.
366        """
367    return self.prefix + str(self.value)
368
369  if sys.version_info > (3, 0):
370    __str__ = __unicode__
371
372  def _eq(self, other):
373    """Compare two nodes for equality."""
374    return (self.type, self.value) == (other.type, other.value)
375
376  def clone(self):
377    """Return a cloned (deep) copy of self."""
378    return Leaf(
379        self.type,
380        self.value, (self.prefix, (self.lineno, self.column)),
381        fixers_applied=self.fixers_applied)
382
383  def leaves(self):
384    yield self
385
386  def post_order(self):
387    """Return a post-order iterator for the tree."""
388    yield self
389
390  def pre_order(self):
391    """Return a pre-order iterator for the tree."""
392    yield self
393
394  @property
395  def prefix(self):
396    """
397        The whitespace and comments preceding this token in the input.
398        """
399    return self._prefix
400
401  @prefix.setter
402  def prefix(self, prefix):
403    self.changed()
404    self._prefix = prefix
405
406
407def convert(gr, raw_node):
408  """
409    Convert raw node information to a Node or Leaf instance.
410
411    This is passed to the parser driver which calls it whenever a reduction of a
412    grammar rule produces a new complete node, so that the tree is build
413    strictly bottom-up.
414    """
415  type, value, context, children = raw_node
416  if children or type in gr.number2symbol:
417    # If there's exactly one child, return that child instead of
418    # creating a new node.
419    if len(children) == 1:
420      return children[0]
421    return Node(type, children, context=context)
422  else:
423    return Leaf(type, value, context=context)
424
425
426class BasePattern(object):
427  """
428    A pattern is a tree matching pattern.
429
430    It looks for a specific node type (token or symbol), and
431    optionally for a specific content.
432
433    This is an abstract base class.  There are three concrete
434    subclasses:
435
436    - LeafPattern matches a single leaf node;
437    - NodePattern matches a single node (usually non-leaf);
438    - WildcardPattern matches a sequence of nodes of variable length.
439    """
440
441  # Defaults for instance variables
442  type = None  # Node type (token if < 256, symbol if >= 256)
443  content = None  # Optional content matching pattern
444  name = None  # Optional name used to store match in results dict
445
446  def __new__(cls, *args, **kwds):
447    """Constructor that prevents BasePattern from being instantiated."""
448    assert cls is not BasePattern, 'Cannot instantiate BasePattern'
449    return object.__new__(cls)
450
451  def __repr__(self):
452    args = [type_repr(self.type), self.content, self.name]
453    while args and args[-1] is None:
454      del args[-1]
455    return '%s(%s)' % (self.__class__.__name__, ', '.join(map(repr, args)))
456
457  def optimize(self):
458    """
459        A subclass can define this as a hook for optimizations.
460
461        Returns either self or another node with the same effect.
462        """
463    return self
464
465  def match(self, node, results=None):
466    """
467        Does this pattern exactly match a node?
468
469        Returns True if it matches, False if not.
470
471        If results is not None, it must be a dict which will be
472        updated with the nodes matching named subpatterns.
473
474        Default implementation for non-wildcard patterns.
475        """
476    if self.type is not None and node.type != self.type:
477      return False
478    if self.content is not None:
479      r = None
480      if results is not None:
481        r = {}
482      if not self._submatch(node, r):
483        return False
484      if r:
485        results.update(r)
486    if results is not None and self.name:
487      results[self.name] = node
488    return True
489
490  def match_seq(self, nodes, results=None):
491    """
492        Does this pattern exactly match a sequence of nodes?
493
494        Default implementation for non-wildcard patterns.
495        """
496    if len(nodes) != 1:
497      return False
498    return self.match(nodes[0], results)
499
500  def generate_matches(self, nodes):
501    """
502        Generator yielding all matches for this pattern.
503
504        Default implementation for non-wildcard patterns.
505        """
506    r = {}
507    if nodes and self.match(nodes[0], r):
508      yield 1, r
509
510
511class LeafPattern(BasePattern):
512
513  def __init__(self, type=None, content=None, name=None):
514    """
515        Initializer.  Takes optional type, content, and name.
516
517        The type, if given must be a token type (< 256).  If not given,
518        this matches any *leaf* node; the content may still be required.
519
520        The content, if given, must be a string.
521
522        If a name is given, the matching node is stored in the results
523        dict under that key.
524        """
525    if type is not None:
526      assert 0 <= type < 256, type
527    if content is not None:
528      assert isinstance(content, str), repr(content)
529    self.type = type
530    self.content = content
531    self.name = name
532
533  def match(self, node, results=None):
534    """Override match() to insist on a leaf node."""
535    if not isinstance(node, Leaf):
536      return False
537    return BasePattern.match(self, node, results)
538
539  def _submatch(self, node, results=None):
540    """
541        Match the pattern's content to the node's children.
542
543        This assumes the node type matches and self.content is not None.
544
545        Returns True if it matches, False if not.
546
547        If results is not None, it must be a dict which will be
548        updated with the nodes matching named subpatterns.
549
550        When returning False, the results dict may still be updated.
551        """
552    return self.content == node.value
553
554
555class NodePattern(BasePattern):
556
557  wildcards = False
558
559  def __init__(self, type=None, content=None, name=None):
560    """
561        Initializer.  Takes optional type, content, and name.
562
563        The type, if given, must be a symbol type (>= 256).  If the
564        type is None this matches *any* single node (leaf or not),
565        except if content is not None, in which it only matches
566        non-leaf nodes that also match the content pattern.
567
568        The content, if not None, must be a sequence of Patterns that
569        must match the node's children exactly.  If the content is
570        given, the type must not be None.
571
572        If a name is given, the matching node is stored in the results
573        dict under that key.
574        """
575    if type is not None:
576      assert type >= 256, type
577    if content is not None:
578      assert not isinstance(content, str), repr(content)
579      content = list(content)
580      for i, item in enumerate(content):
581        assert isinstance(item, BasePattern), (i, item)
582        if isinstance(item, WildcardPattern):
583          self.wildcards = True
584    self.type = type
585    self.content = content
586    self.name = name
587
588  def _submatch(self, node, results=None):
589    """
590        Match the pattern's content to the node's children.
591
592        This assumes the node type matches and self.content is not None.
593
594        Returns True if it matches, False if not.
595
596        If results is not None, it must be a dict which will be
597        updated with the nodes matching named subpatterns.
598
599        When returning False, the results dict may still be updated.
600        """
601    if self.wildcards:
602      for c, r in generate_matches(self.content, node.children):
603        if c == len(node.children):
604          if results is not None:
605            results.update(r)
606          return True
607      return False
608    if len(self.content) != len(node.children):
609      return False
610    for subpattern, child in zip(self.content, node.children):
611      if not subpattern.match(child, results):
612        return False
613    return True
614
615
616class WildcardPattern(BasePattern):
617  """
618    A wildcard pattern can match zero or more nodes.
619
620    This has all the flexibility needed to implement patterns like:
621
622    .*      .+      .?      .{m,n}
623    (a b c | d e | f)
624    (...)*  (...)+  (...)?  (...){m,n}
625
626    except it always uses non-greedy matching.
627    """
628
629  def __init__(self, content=None, min=0, max=HUGE, name=None):
630    """
631        Initializer.
632
633        Args:
634            content: optional sequence of subsequences of patterns;
635                     if absent, matches one node;
636                     if present, each subsequence is an alternative [*]
637            min: optional minimum number of times to match, default 0
638            max: optional maximum number of times to match, default HUGE
639            name: optional name assigned to this match
640
641        [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
642            equivalent to (a b c | d e | f g h); if content is None,
643            this is equivalent to '.' in regular expression terms.
644            The min and max parameters work as follows:
645                min=0, max=maxint: .*
646                min=1, max=maxint: .+
647                min=0, max=1: .?
648                min=1, max=1: .
649            If content is not None, replace the dot with the parenthesized
650            list of alternatives, e.g. (a b c | d e | f g h)*
651        """
652    assert 0 <= min <= max <= HUGE, (min, max)
653    if content is not None:
654      content = tuple(map(tuple, content))  # Protect against alterations
655      # Check sanity of alternatives
656      assert len(content), repr(content)  # Can't have zero alternatives
657      for alt in content:
658        assert len(alt), repr(alt)  # Can have empty alternatives
659    self.content = content
660    self.min = min
661    self.max = max
662    self.name = name
663
664  def optimize(self):
665    """Optimize certain stacked wildcard patterns."""
666    subpattern = None
667    if (self.content is not None and len(self.content) == 1 and
668        len(self.content[0]) == 1):
669      subpattern = self.content[0][0]
670    if self.min == 1 and self.max == 1:
671      if self.content is None:
672        return NodePattern(name=self.name)
673      if subpattern is not None and self.name == subpattern.name:
674        return subpattern.optimize()
675    if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
676        subpattern.min <= 1 and self.name == subpattern.name):
677      return WildcardPattern(subpattern.content, self.min * subpattern.min,
678                             self.max * subpattern.max, subpattern.name)
679    return self
680
681  def match(self, node, results=None):
682    """Does this pattern exactly match a node?"""
683    return self.match_seq([node], results)
684
685  def match_seq(self, nodes, results=None):
686    """Does this pattern exactly match a sequence of nodes?"""
687    for c, r in self.generate_matches(nodes):
688      if c == len(nodes):
689        if results is not None:
690          results.update(r)
691          if self.name:
692            results[self.name] = list(nodes)
693        return True
694    return False
695
696  def generate_matches(self, nodes):
697    """
698        Generator yielding matches for a sequence of nodes.
699
700        Args:
701            nodes: sequence of nodes
702
703        Yields:
704            (count, results) tuples where:
705            count: the match comprises nodes[:count];
706            results: dict containing named submatches.
707        """
708    if self.content is None:
709      # Shortcut for special case (see __init__.__doc__)
710      for count in range(self.min, 1 + min(len(nodes), self.max)):
711        r = {}
712        if self.name:
713          r[self.name] = nodes[:count]
714        yield count, r
715    elif self.name == 'bare_name':
716      yield self._bare_name_matches(nodes)
717    else:
718      # The reason for this is that hitting the recursion limit usually
719      # results in some ugly messages about how RuntimeErrors are being
720      # ignored. We only have to do this on CPython, though, because other
721      # implementations don't have this nasty bug in the first place.
722      if hasattr(sys, 'getrefcount'):
723        save_stderr = sys.stderr
724        sys.stderr = StringIO()
725      try:
726        for count, r in self._recursive_matches(nodes, 0):
727          if self.name:
728            r[self.name] = nodes[:count]
729          yield count, r
730      except RuntimeError:
731        # We fall back to the iterative pattern matching scheme if the recursive
732        # scheme hits the recursion limit.
733        for count, r in self._iterative_matches(nodes):
734          if self.name:
735            r[self.name] = nodes[:count]
736          yield count, r
737      finally:
738        if hasattr(sys, 'getrefcount'):
739          sys.stderr = save_stderr
740
741  def _iterative_matches(self, nodes):
742    """Helper to iteratively yield the matches."""
743    nodelen = len(nodes)
744    if 0 >= self.min:
745      yield 0, {}
746
747    results = []
748    # generate matches that use just one alt from self.content
749    for alt in self.content:
750      for c, r in generate_matches(alt, nodes):
751        yield c, r
752        results.append((c, r))
753
754    # for each match, iterate down the nodes
755    while results:
756      new_results = []
757      for c0, r0 in results:
758        # stop if the entire set of nodes has been matched
759        if c0 < nodelen and c0 <= self.max:
760          for alt in self.content:
761            for c1, r1 in generate_matches(alt, nodes[c0:]):
762              if c1 > 0:
763                r = {}
764                r.update(r0)
765                r.update(r1)
766                yield c0 + c1, r
767                new_results.append((c0 + c1, r))
768      results = new_results
769
770  def _bare_name_matches(self, nodes):
771    """Special optimized matcher for bare_name."""
772    count = 0
773    r = {}
774    done = False
775    max = len(nodes)
776    while not done and count < max:
777      done = True
778      for leaf in self.content:
779        if leaf[0].match(nodes[count], r):
780          count += 1
781          done = False
782          break
783    r[self.name] = nodes[:count]
784    return count, r
785
786  def _recursive_matches(self, nodes, count):
787    """Helper to recursively yield the matches."""
788    assert self.content is not None
789    if count >= self.min:
790      yield 0, {}
791    if count < self.max:
792      for alt in self.content:
793        for c0, r0 in generate_matches(alt, nodes):
794          for c1, r1 in self._recursive_matches(nodes[c0:], count + 1):
795            r = {}
796            r.update(r0)
797            r.update(r1)
798            yield c0 + c1, r
799
800
801class NegatedPattern(BasePattern):
802
803  def __init__(self, content=None):
804    """
805        Initializer.
806
807        The argument is either a pattern or None.  If it is None, this
808        only matches an empty sequence (effectively '$' in regex
809        lingo).  If it is not None, this matches whenever the argument
810        pattern doesn't have any matches.
811        """
812    if content is not None:
813      assert isinstance(content, BasePattern), repr(content)
814    self.content = content
815
816  def match(self, node):
817    # We never match a node in its entirety
818    return False
819
820  def match_seq(self, nodes):
821    # We only match an empty sequence of nodes in its entirety
822    return len(nodes) == 0
823
824  def generate_matches(self, nodes):
825    if self.content is None:
826      # Return a match if there is an empty sequence
827      if len(nodes) == 0:
828        yield 0, {}
829    else:
830      # Return a match if the argument pattern has no matches
831      for c, r in self.content.generate_matches(nodes):
832        return
833      yield 0, {}
834
835
836def generate_matches(patterns, nodes):
837  """
838    Generator yielding matches for a sequence of patterns and nodes.
839
840    Args:
841        patterns: a sequence of patterns
842        nodes: a sequence of nodes
843
844    Yields:
845        (count, results) tuples where:
846        count: the entire sequence of patterns matches nodes[:count];
847        results: dict containing named submatches.
848        """
849  if not patterns:
850    yield 0, {}
851  else:
852    p, rest = patterns[0], patterns[1:]
853    for c0, r0 in p.generate_matches(nodes):
854      if not rest:
855        yield c0, r0
856      else:
857        for c1, r1 in generate_matches(rest, nodes[c0:]):
858          r = {}
859          r.update(r0)
860          r.update(r1)
861          yield c0 + c1, r
862