• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015-2017 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""pytree-related utilities.
15
16This module collects various utilities related to the parse trees produced by
17the lib2to3 library.
18
19  NodeName(): produces a string name for pytree nodes.
20  ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
21                     a given string with code to a pytree.
22  InsertNodeBefore(): insert a node before another in a pytree.
23  InsertNodeAfter(): insert a node after another in a pytree.
24  {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
25"""
26
27import ast
28from lib2to3 import pygram
29from lib2to3 import pytree
30from lib2to3.pgen2 import driver
31from lib2to3.pgen2 import parse
32from lib2to3.pgen2 import token
33
34# TODO(eliben): We may want to get rid of this filtering at some point once we
35# have a better understanding of what information we need from the tree. Then,
36# these tokens may be filtered out from the tree before the tree gets to the
37# unwrapper.
38NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])
39
40OPENING_BRACKETS = frozenset({'(', '[', '{'})
41CLOSING_BRACKETS = frozenset({')', ']', '}'})
42
43
44class Annotation(object):
45  """Annotation names associated with pytrees."""
46  CHILD_INDENT = 'child_indent'
47  NEWLINES = 'newlines'
48  MUST_SPLIT = 'must_split'
49  SPLIT_PENALTY = 'split_penalty'
50  SUBTYPE = 'subtype'
51
52
53def NodeName(node):
54  """Produce a string name for a given node.
55
56  For a Leaf this is the token name, and for a Node this is the type.
57
58  Arguments:
59    node: a tree node
60
61  Returns:
62    Name as a string.
63  """
64  # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
65  if node.type < 256:
66    return token.tok_name[node.type]
67  else:
68    return pygram.python_grammar.number2symbol[node.type]
69
70
71# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
72# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
73# context where a keyword is disallowed).
74# It forgets to do the same for 'exec' though. Luckily, Python is amenable to
75# monkey-patching.
76_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy()
77del _GRAMMAR_FOR_PY3.keywords['exec']
78
79_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy()
80del _GRAMMAR_FOR_PY2.keywords['nonlocal']
81
82
83def ParseCodeToTree(code):
84  """Parse the given code to a lib2to3 pytree.
85
86  Arguments:
87    code: a string with the code to parse.
88
89  Raises:
90    SyntaxError if the code is invalid syntax.
91    parse.ParseError if some other parsing failure.
92
93  Returns:
94    The root node of the parsed tree.
95  """
96  # This function is tiny, but the incantation for invoking the parser correctly
97  # is sufficiently magical to be worth abstracting away.
98  try:
99    # Try to parse using a Python 3 grammar, which is more permissive (print and
100    # exec are not keywords).
101    parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert)
102    tree = parser_driver.parse_string(code, debug=False)
103  except parse.ParseError:
104    # Now try to parse using a Python 2 grammar; If this fails, then
105    # there's something else wrong with the code.
106    try:
107      parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert)
108      tree = parser_driver.parse_string(code, debug=False)
109    except parse.ParseError:
110      # Raise a syntax error if the code is invalid python syntax.
111      try:
112        ast.parse(code)
113      except SyntaxError as e:
114        raise e
115      else:
116        raise
117  return _WrapEndMarker(tree)
118
119
120def _WrapEndMarker(tree):
121  """Wrap a single ENDMARKER token in a "file_input" node.
122
123  Arguments:
124    tree: (pytree.Node) The root node of the parsed tree.
125
126  Returns:
127    The root node of the parsed tree. If the tree is a single ENDMARKER node,
128    then that node is wrapped in a "file_input" node. That will ensure we don't
129    skip comments attached to that node.
130  """
131  if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
132    return pytree.Node(pygram.python_symbols.file_input, [tree])
133  return tree
134
135
136def InsertNodesBefore(new_nodes, target):
137  """Insert new_nodes before the given target location in the tree.
138
139  Arguments:
140    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
141      tree).
142    target: the target node before which the new node node will be inserted.
143
144  Raises:
145    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
146  """
147  for node in new_nodes:
148    _InsertNodeAt(node, target, after=False)
149
150
151def InsertNodesAfter(new_nodes, target):
152  """Insert new_nodes after the given target location in the tree.
153
154  Arguments:
155    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
156      tree).
157    target: the target node after which the new node node will be inserted.
158
159  Raises:
160    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
161  """
162  for node in reversed(new_nodes):
163    _InsertNodeAt(node, target, after=True)
164
165
166def _InsertNodeAt(new_node, target, after=False):
167  """Underlying implementation for node insertion.
168
169  Arguments:
170    new_node: a new node to insert (this node should not be in the tree).
171    target: the target node.
172    after: if True, new_node is inserted after target. Otherwise, it's inserted
173      before target.
174
175  Returns:
176    nothing
177
178  Raises:
179    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
180  """
181
182  # Protect against attempts to insert nodes which already belong to some tree.
183  if new_node.parent is not None:
184    raise RuntimeError('inserting node which already has a parent',
185                       (new_node, new_node.parent))
186
187  # The code here is based on pytree.Base.next_sibling
188  parent_of_target = target.parent
189  if parent_of_target is None:
190    raise RuntimeError('expected target node to have a parent', (target,))
191
192  for i, child in enumerate(parent_of_target.children):
193    if child is target:
194      insertion_index = i + 1 if after else i
195      parent_of_target.insert_child(insertion_index, new_node)
196      return
197
198  raise RuntimeError('unable to find insertion point for target node',
199                     (target,))
200
201
202# The following constant and functions implement a simple custom annotation
203# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
204# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
205# managed through GetNodeAnnotation and SetNodeAnnotation.
206_NODE_ANNOTATION_PREFIX = '_yapf_annotation_'
207
208
209def GetNodeAnnotation(node, annotation, default=None):
210  """Get annotation value from a node.
211
212  Arguments:
213    node: the node.
214    annotation: annotation name - a string.
215    default: the default value to return if there's no annotation.
216
217  Returns:
218    Value of the annotation in the given node. If the node doesn't have this
219    particular annotation name yet, returns default.
220  """
221  return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)
222
223
224def SetNodeAnnotation(node, annotation, value):
225  """Set annotation value on a node.
226
227  Arguments:
228    node: the node.
229    annotation: annotation name - a string.
230    value: annotation value to set.
231  """
232  setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)
233
234
235def AppendNodeAnnotation(node, annotation, value):
236  """Appends an annotation value to a list of annotations on the node.
237
238  Arguments:
239    node: the node.
240    annotation: annotation name - a string.
241    value: annotation value to set.
242  """
243  attr = GetNodeAnnotation(node, annotation, set())
244  attr.add(value)
245  SetNodeAnnotation(node, annotation, attr)
246
247
248def RemoveSubtypeAnnotation(node, value):
249  """Removes an annotation value from the subtype annotations on the node.
250
251  Arguments:
252    node: the node.
253    value: annotation value to remove.
254  """
255  attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
256  if attr and value in attr:
257    attr.remove(value)
258    SetNodeAnnotation(node, Annotation.SUBTYPE, attr)
259
260
261def DumpNodeToString(node):
262  """Dump a string representation of the given node. For debugging.
263
264  Arguments:
265    node: the node.
266
267  Returns:
268    The string representation.
269  """
270  if isinstance(node, pytree.Leaf):
271    fmt = '{name}({value}) [lineno={lineno}, column={column}, prefix={prefix}]'
272    return fmt.format(
273        name=NodeName(node),
274        value=_PytreeNodeRepr(node),
275        lineno=node.lineno,
276        column=node.column,
277        prefix=repr(node.prefix))
278  else:
279    fmt = '{node} [{len} children] [child_indent="{indent}"]'
280    return fmt.format(
281        node=NodeName(node),
282        len=len(node.children),
283        indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))
284
285
286def _PytreeNodeRepr(node):
287  """Like pytree.Node.__repr__, but names instead of numbers for tokens."""
288  if isinstance(node, pytree.Node):
289    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
290                           [_PytreeNodeRepr(c) for c in node.children])
291  if isinstance(node, pytree.Leaf):
292    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)
293
294
295def IsCommentStatement(node):
296  return (NodeName(node) == 'simple_stmt' and
297          node.children[0].type == token.COMMENT)
298