• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""pytree-related utilities.
15
16This module collects various utilities related to the parse trees produced by
17the lib2to3 library.
18
19  NodeName(): produces a string name for pytree nodes.
20  ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
21                     a given string with code to a pytree.
22  InsertNodeBefore(): insert a node before another in a pytree.
23  InsertNodeAfter(): insert a node after another in a pytree.
24  {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
25"""
26
27import ast
28import os
29
30from lib2to3 import pygram
31from lib2to3 import pytree
32from lib2to3.pgen2 import driver
33from lib2to3.pgen2 import parse
34from lib2to3.pgen2 import token
35
36# TODO(eliben): We may want to get rid of this filtering at some point once we
37# have a better understanding of what information we need from the tree. Then,
38# these tokens may be filtered out from the tree before the tree gets to the
39# unwrapper.
40NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])
41
42OPENING_BRACKETS = frozenset({'(', '[', '{'})
43CLOSING_BRACKETS = frozenset({')', ']', '}'})
44
45
46class Annotation(object):
47  """Annotation names associated with pytrees."""
48  CHILD_INDENT = 'child_indent'
49  NEWLINES = 'newlines'
50  MUST_SPLIT = 'must_split'
51  SPLIT_PENALTY = 'split_penalty'
52  SUBTYPE = 'subtype'
53
54
55def NodeName(node):
56  """Produce a string name for a given node.
57
58  For a Leaf this is the token name, and for a Node this is the type.
59
60  Arguments:
61    node: a tree node
62
63  Returns:
64    Name as a string.
65  """
66  # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
67  if node.type < 256:
68    return token.tok_name[node.type]
69  else:
70    return pygram.python_grammar.number2symbol[node.type]
71
72
73def FirstLeafNode(node):
74  if isinstance(node, pytree.Leaf):
75    return node
76  return FirstLeafNode(node.children[0])
77
78
79def LastLeafNode(node):
80  if isinstance(node, pytree.Leaf):
81    return node
82  return LastLeafNode(node.children[-1])
83
84
85# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
86# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
87# context where a keyword is disallowed).
88# It forgets to do the same for 'exec' though. Luckily, Python is amenable to
89# monkey-patching.
90_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy()
91del _GRAMMAR_FOR_PY3.keywords['exec']
92
93_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy()
94del _GRAMMAR_FOR_PY2.keywords['nonlocal']
95
96
97def ParseCodeToTree(code):
98  """Parse the given code to a lib2to3 pytree.
99
100  Arguments:
101    code: a string with the code to parse.
102
103  Raises:
104    SyntaxError if the code is invalid syntax.
105    parse.ParseError if some other parsing failure.
106
107  Returns:
108    The root node of the parsed tree.
109  """
110  # This function is tiny, but the incantation for invoking the parser correctly
111  # is sufficiently magical to be worth abstracting away.
112  if not code.endswith(os.linesep):
113    code += os.linesep
114
115  try:
116    # Try to parse using a Python 3 grammar, which is more permissive (print and
117    # exec are not keywords).
118    parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert)
119    tree = parser_driver.parse_string(code, debug=False)
120  except parse.ParseError:
121    # Now try to parse using a Python 2 grammar; If this fails, then
122    # there's something else wrong with the code.
123    try:
124      parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert)
125      tree = parser_driver.parse_string(code, debug=False)
126    except parse.ParseError:
127      # Raise a syntax error if the code is invalid python syntax.
128      try:
129        ast.parse(code)
130      except SyntaxError as e:
131        raise e
132      else:
133        raise
134  return _WrapEndMarker(tree)
135
136
137def _WrapEndMarker(tree):
138  """Wrap a single ENDMARKER token in a "file_input" node.
139
140  Arguments:
141    tree: (pytree.Node) The root node of the parsed tree.
142
143  Returns:
144    The root node of the parsed tree. If the tree is a single ENDMARKER node,
145    then that node is wrapped in a "file_input" node. That will ensure we don't
146    skip comments attached to that node.
147  """
148  if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
149    return pytree.Node(pygram.python_symbols.file_input, [tree])
150  return tree
151
152
153def InsertNodesBefore(new_nodes, target):
154  """Insert new_nodes before the given target location in the tree.
155
156  Arguments:
157    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
158      tree).
159    target: the target node before which the new node node will be inserted.
160
161  Raises:
162    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
163  """
164  for node in new_nodes:
165    _InsertNodeAt(node, target, after=False)
166
167
168def InsertNodesAfter(new_nodes, target):
169  """Insert new_nodes after the given target location in the tree.
170
171  Arguments:
172    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
173      tree).
174    target: the target node after which the new node node will be inserted.
175
176  Raises:
177    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
178  """
179  for node in reversed(new_nodes):
180    _InsertNodeAt(node, target, after=True)
181
182
183def _InsertNodeAt(new_node, target, after=False):
184  """Underlying implementation for node insertion.
185
186  Arguments:
187    new_node: a new node to insert (this node should not be in the tree).
188    target: the target node.
189    after: if True, new_node is inserted after target. Otherwise, it's inserted
190      before target.
191
192  Returns:
193    nothing
194
195  Raises:
196    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
197  """
198
199  # Protect against attempts to insert nodes which already belong to some tree.
200  if new_node.parent is not None:
201    raise RuntimeError('inserting node which already has a parent',
202                       (new_node, new_node.parent))
203
204  # The code here is based on pytree.Base.next_sibling
205  parent_of_target = target.parent
206  if parent_of_target is None:
207    raise RuntimeError('expected target node to have a parent', (target,))
208
209  for i, child in enumerate(parent_of_target.children):
210    if child is target:
211      insertion_index = i + 1 if after else i
212      parent_of_target.insert_child(insertion_index, new_node)
213      return
214
215  raise RuntimeError('unable to find insertion point for target node',
216                     (target,))
217
218
219# The following constant and functions implement a simple custom annotation
220# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
221# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
222# managed through GetNodeAnnotation and SetNodeAnnotation.
223_NODE_ANNOTATION_PREFIX = '_yapf_annotation_'
224
225
226def CopyYapfAnnotations(src, dst):
227  """Copy all YAPF annotations from the source node to the destination node.
228
229  Arguments:
230    src: the source node.
231    dst: the destination node.
232  """
233  for annotation in dir(src):
234    if annotation.startswith(_NODE_ANNOTATION_PREFIX):
235      setattr(dst, annotation, getattr(src, annotation, None))
236
237
238def GetNodeAnnotation(node, annotation, default=None):
239  """Get annotation value from a node.
240
241  Arguments:
242    node: the node.
243    annotation: annotation name - a string.
244    default: the default value to return if there's no annotation.
245
246  Returns:
247    Value of the annotation in the given node. If the node doesn't have this
248    particular annotation name yet, returns default.
249  """
250  return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)
251
252
253def SetNodeAnnotation(node, annotation, value):
254  """Set annotation value on a node.
255
256  Arguments:
257    node: the node.
258    annotation: annotation name - a string.
259    value: annotation value to set.
260  """
261  setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)
262
263
264def AppendNodeAnnotation(node, annotation, value):
265  """Appends an annotation value to a list of annotations on the node.
266
267  Arguments:
268    node: the node.
269    annotation: annotation name - a string.
270    value: annotation value to set.
271  """
272  attr = GetNodeAnnotation(node, annotation, set())
273  attr.add(value)
274  SetNodeAnnotation(node, annotation, attr)
275
276
277def RemoveSubtypeAnnotation(node, value):
278  """Removes an annotation value from the subtype annotations on the node.
279
280  Arguments:
281    node: the node.
282    value: annotation value to remove.
283  """
284  attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
285  if attr and value in attr:
286    attr.remove(value)
287    SetNodeAnnotation(node, Annotation.SUBTYPE, attr)
288
289
290def GetOpeningBracket(node):
291  """Get opening bracket value from a node.
292
293  Arguments:
294    node: the node.
295
296  Returns:
297    The opening bracket node or None if it couldn't find one.
298  """
299  return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None)
300
301
302def SetOpeningBracket(node, bracket):
303  """Set opening bracket value for a node.
304
305  Arguments:
306    node: the node.
307    bracket: opening bracket to set.
308  """
309  setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket)
310
311
312def DumpNodeToString(node):
313  """Dump a string representation of the given node. For debugging.
314
315  Arguments:
316    node: the node.
317
318  Returns:
319    The string representation.
320  """
321  if isinstance(node, pytree.Leaf):
322    fmt = ('{name}({value}) [lineno={lineno}, column={column}, '
323           'prefix={prefix}, penalty={penalty}]')
324    return fmt.format(
325        name=NodeName(node),
326        value=_PytreeNodeRepr(node),
327        lineno=node.lineno,
328        column=node.column,
329        prefix=repr(node.prefix),
330        penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None))
331  else:
332    fmt = '{node} [{len} children] [child_indent="{indent}"]'
333    return fmt.format(
334        node=NodeName(node),
335        len=len(node.children),
336        indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))
337
338
339def _PytreeNodeRepr(node):
340  """Like pytree.Node.__repr__, but names instead of numbers for tokens."""
341  if isinstance(node, pytree.Node):
342    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
343                           [_PytreeNodeRepr(c) for c in node.children])
344  if isinstance(node, pytree.Leaf):
345    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)
346
347
348def IsCommentStatement(node):
349  return (NodeName(node) == 'simple_stmt' and
350          node.children[0].type == token.COMMENT)
351