• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""pytree-related utilities.
15
16This module collects various utilities related to the parse trees produced by
17the lib2to3 library.
18
19  NodeName(): produces a string name for pytree nodes.
20  ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
21                     a given string with code to a pytree.
22  InsertNodeBefore(): insert a node before another in a pytree.
23  InsertNodeAfter(): insert a node after another in a pytree.
24  {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
25"""
26
27import ast
28import os
29
30from yapf_third_party._ylib2to3 import pygram
31from yapf_third_party._ylib2to3 import pytree
32from yapf_third_party._ylib2to3.pgen2 import driver
33from yapf_third_party._ylib2to3.pgen2 import parse
34from yapf_third_party._ylib2to3.pgen2 import token
35
36# TODO(eliben): We may want to get rid of this filtering at some point once we
37# have a better understanding of what information we need from the tree. Then,
38# these tokens may be filtered out from the tree before the tree gets to the
39# unwrapper.
40NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])
41
42
43class Annotation(object):
44  """Annotation names associated with pytrees."""
45  CHILD_INDENT = 'child_indent'
46  NEWLINES = 'newlines'
47  MUST_SPLIT = 'must_split'
48  SPLIT_PENALTY = 'split_penalty'
49  SUBTYPE = 'subtype'
50
51
52def NodeName(node):
53  """Produce a string name for a given node.
54
55  For a Leaf this is the token name, and for a Node this is the type.
56
57  Arguments:
58    node: a tree node
59
60  Returns:
61    Name as a string.
62  """
63  # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
64  if node.type < 256:
65    return token.tok_name[node.type]
66  else:
67    return pygram.python_grammar.number2symbol[node.type]
68
69
70def FirstLeafNode(node):
71  if isinstance(node, pytree.Leaf):
72    return node
73  return FirstLeafNode(node.children[0])
74
75
76def LastLeafNode(node):
77  if isinstance(node, pytree.Leaf):
78    return node
79  return LastLeafNode(node.children[-1])
80
81
82# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
83# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
84# context where a keyword is disallowed).
85# It forgets to do the same for 'exec' though. Luckily, Python is amenable to
86# monkey-patching.
87# Note that pygram.python_grammar_no_print_and_exec_statement with "_and_exec"
88# will require Python >=3.8.
89_PYTHON_GRAMMAR = pygram.python_grammar_no_print_statement.copy()
90del _PYTHON_GRAMMAR.keywords['exec']
91
92
93def ParseCodeToTree(code):
94  """Parse the given code to a lib2to3 pytree.
95
96  Arguments:
97    code: a string with the code to parse.
98
99  Raises:
100    SyntaxError if the code is invalid syntax.
101    parse.ParseError if some other parsing failure.
102
103  Returns:
104    The root node of the parsed tree.
105  """
106  # This function is tiny, but the incantation for invoking the parser correctly
107  # is sufficiently magical to be worth abstracting away.
108  if not code.endswith(os.linesep):
109    code += os.linesep
110
111  try:
112    parser_driver = driver.Driver(_PYTHON_GRAMMAR, convert=pytree.convert)
113    tree = parser_driver.parse_string(code, debug=False)
114  except parse.ParseError:
115    # Raise a syntax error if the code is invalid python syntax.
116    ast.parse(code)
117    raise
118  return _WrapEndMarker(tree)
119
120
121def _WrapEndMarker(tree):
122  """Wrap a single ENDMARKER token in a "file_input" node.
123
124  Arguments:
125    tree: (pytree.Node) The root node of the parsed tree.
126
127  Returns:
128    The root node of the parsed tree. If the tree is a single ENDMARKER node,
129    then that node is wrapped in a "file_input" node. That will ensure we don't
130    skip comments attached to that node.
131  """
132  if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
133    return pytree.Node(pygram.python_symbols.file_input, [tree])
134  return tree
135
136
137def InsertNodesBefore(new_nodes, target):
138  """Insert new_nodes before the given target location in the tree.
139
140  Arguments:
141    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
142      tree).
143    target: the target node before which the new node node will be inserted.
144
145  Raises:
146    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
147  """
148  for node in new_nodes:
149    _InsertNodeAt(node, target, after=False)
150
151
152def InsertNodesAfter(new_nodes, target):
153  """Insert new_nodes after the given target location in the tree.
154
155  Arguments:
156    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
157      tree).
158    target: the target node after which the new node node will be inserted.
159
160  Raises:
161    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
162  """
163  for node in reversed(new_nodes):
164    _InsertNodeAt(node, target, after=True)
165
166
167def _InsertNodeAt(new_node, target, after=False):
168  """Underlying implementation for node insertion.
169
170  Arguments:
171    new_node: a new node to insert (this node should not be in the tree).
172    target: the target node.
173    after: if True, new_node is inserted after target. Otherwise, it's inserted
174      before target.
175
176  Returns:
177    nothing
178
179  Raises:
180    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
181  """
182
183  # Protect against attempts to insert nodes which already belong to some tree.
184  if new_node.parent is not None:
185    raise RuntimeError('inserting node which already has a parent',
186                       (new_node, new_node.parent))
187
188  # The code here is based on pytree.Base.next_sibling
189  parent_of_target = target.parent
190  if parent_of_target is None:
191    raise RuntimeError('expected target node to have a parent', (target,))
192
193  for i, child in enumerate(parent_of_target.children):
194    if child is target:
195      insertion_index = i + 1 if after else i
196      parent_of_target.insert_child(insertion_index, new_node)
197      return
198
199  raise RuntimeError('unable to find insertion point for target node',
200                     (target,))
201
202
203# The following constant and functions implement a simple custom annotation
204# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
205# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
206# managed through GetNodeAnnotation and SetNodeAnnotation.
207_NODE_ANNOTATION_PREFIX = '_yapf_annotation_'
208
209
210def CopyYapfAnnotations(src, dst):
211  """Copy all YAPF annotations from the source node to the destination node.
212
213  Arguments:
214    src: the source node.
215    dst: the destination node.
216  """
217  for annotation in dir(src):
218    if annotation.startswith(_NODE_ANNOTATION_PREFIX):
219      setattr(dst, annotation, getattr(src, annotation, None))
220
221
222def GetNodeAnnotation(node, annotation, default=None):
223  """Get annotation value from a node.
224
225  Arguments:
226    node: the node.
227    annotation: annotation name - a string.
228    default: the default value to return if there's no annotation.
229
230  Returns:
231    Value of the annotation in the given node. If the node doesn't have this
232    particular annotation name yet, returns default.
233  """
234  return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)
235
236
237def SetNodeAnnotation(node, annotation, value):
238  """Set annotation value on a node.
239
240  Arguments:
241    node: the node.
242    annotation: annotation name - a string.
243    value: annotation value to set.
244  """
245  setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)
246
247
248def AppendNodeAnnotation(node, annotation, value):
249  """Appends an annotation value to a list of annotations on the node.
250
251  Arguments:
252    node: the node.
253    annotation: annotation name - a string.
254    value: annotation value to set.
255  """
256  attr = GetNodeAnnotation(node, annotation, set())
257  attr.add(value)
258  SetNodeAnnotation(node, annotation, attr)
259
260
261def RemoveSubtypeAnnotation(node, value):
262  """Removes an annotation value from the subtype annotations on the node.
263
264  Arguments:
265    node: the node.
266    value: annotation value to remove.
267  """
268  attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
269  if attr and value in attr:
270    attr.remove(value)
271    SetNodeAnnotation(node, Annotation.SUBTYPE, attr)
272
273
274def GetOpeningBracket(node):
275  """Get opening bracket value from a node.
276
277  Arguments:
278    node: the node.
279
280  Returns:
281    The opening bracket node or None if it couldn't find one.
282  """
283  return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None)
284
285
286def SetOpeningBracket(node, bracket):
287  """Set opening bracket value for a node.
288
289  Arguments:
290    node: the node.
291    bracket: opening bracket to set.
292  """
293  setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket)
294
295
296def DumpNodeToString(node):
297  """Dump a string representation of the given node. For debugging.
298
299  Arguments:
300    node: the node.
301
302  Returns:
303    The string representation.
304  """
305  if isinstance(node, pytree.Leaf):
306    fmt = ('{name}({value}) [lineno={lineno}, column={column}, '
307           'prefix={prefix}, penalty={penalty}]')
308    return fmt.format(
309        name=NodeName(node),
310        value=_PytreeNodeRepr(node),
311        lineno=node.lineno,
312        column=node.column,
313        prefix=repr(node.prefix),
314        penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None))
315  else:
316    fmt = '{node} [{len} children] [child_indent="{indent}"]'
317    return fmt.format(
318        node=NodeName(node),
319        len=len(node.children),
320        indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))
321
322
323def _PytreeNodeRepr(node):
324  """Like pytree.Node.__repr__, but names instead of numbers for tokens."""
325  if isinstance(node, pytree.Node):
326    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
327                           [_PytreeNodeRepr(c) for c in node.children])
328  if isinstance(node, pytree.Leaf):
329    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)
330
331
332def IsCommentStatement(node):
333  return (NodeName(node) == 'simple_stmt' and
334          node.children[0].type == token.COMMENT)
335