• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""PyTreeUnwrapper - produces a list of logical lines from a pytree.
15
16[for a description of what a logical line is, see logical_line.py]
17
18This is a pytree visitor that goes over a parse tree and produces a list of
19LogicalLine containers from it, each with its own depth and containing all the
20tokens that could fit on the line if there were no maximal line-length
21limitations.
22
23Note: a precondition to running this visitor and obtaining correct results is
24for the tree to have its comments spliced in as nodes. Prefixes are ignored.
25
26For most uses, the convenience function UnwrapPyTree should be sufficient.
27"""
28
29# The word "token" is overloaded within this module, so for clarity rename
30# the imported pgen2.token module.
31from lib2to3 import pytree
32from lib2to3.pgen2 import token as grammar_token
33
34from yapf.yapflib import format_token
35from yapf.yapflib import logical_line
36from yapf.yapflib import object_state
37from yapf.yapflib import pytree_utils
38from yapf.yapflib import pytree_visitor
39from yapf.yapflib import split_penalty
40from yapf.yapflib import style
41from yapf.yapflib import subtypes
42
43
44def UnwrapPyTree(tree):
45  """Create and return a list of logical lines from the given pytree.
46
47  Arguments:
48    tree: the top-level pytree node to unwrap..
49
50  Returns:
51    A list of LogicalLine objects.
52  """
53  unwrapper = PyTreeUnwrapper()
54  unwrapper.Visit(tree)
55  llines = unwrapper.GetLogicalLines()
56  llines.sort(key=lambda x: x.lineno)
57  return llines
58
59
60# Grammar tokens considered as whitespace for the purpose of unwrapping.
61_WHITESPACE_TOKENS = frozenset([
62    grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT,
63    grammar_token.ENDMARKER
64])
65
66
67class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor):
68  """PyTreeUnwrapper - see file-level docstring for detailed description.
69
70  Note: since this implements PyTreeVisitor and node names in lib2to3 are
71  underscore_separated, the visiting methods of this class are named as
72  Visit_node_name. invalid-name pragmas are added to each such method to silence
73  a style warning. This is forced on us by the usage of lib2to3, and re-munging
74  method names to make them different from actual node names sounded like a
75  confusing and brittle affair that wasn't worth it for this small & controlled
76  deviation from the style guide.
77
78  To understand the connection between visitor methods in this class, some
79  familiarity with the Python grammar is required.
80  """
81
82  def __init__(self):
83    # A list of all logical lines finished visiting so far.
84    self._logical_lines = []
85
86    # Builds up a "current" logical line while visiting pytree nodes. Some nodes
87    # will finish a line and start a new one.
88    self._cur_logical_line = logical_line.LogicalLine(0)
89
90    # Current indentation depth.
91    self._cur_depth = 0
92
93  def GetLogicalLines(self):
94    """Fetch the result of the tree walk.
95
96    Note: only call this after visiting the whole tree.
97
98    Returns:
99      A list of LogicalLine objects.
100    """
101    # Make sure the last line that was being populated is flushed.
102    self._StartNewLine()
103    return self._logical_lines
104
105  def _StartNewLine(self):
106    """Finish current line and start a new one.
107
108    Place the currently accumulated line into the _logical_lines list and
109    start a new one.
110    """
111    if self._cur_logical_line.tokens:
112      self._logical_lines.append(self._cur_logical_line)
113      _MatchBrackets(self._cur_logical_line)
114      _IdentifyParameterLists(self._cur_logical_line)
115      _AdjustSplitPenalty(self._cur_logical_line)
116    self._cur_logical_line = logical_line.LogicalLine(self._cur_depth)
117
118  _STMT_TYPES = frozenset({
119      'if_stmt',
120      'while_stmt',
121      'for_stmt',
122      'try_stmt',
123      'expect_clause',
124      'with_stmt',
125      'funcdef',
126      'classdef',
127  })
128
129  # pylint: disable=invalid-name,missing-docstring
130  def Visit_simple_stmt(self, node):
131    # A 'simple_stmt' conveniently represents a non-compound Python statement,
132    # i.e. a statement that does not contain other statements.
133
134    # When compound nodes have a single statement as their suite, the parser
135    # can leave it in the tree directly without creating a suite. But we have
136    # to increase depth in these cases as well. However, don't increase the
137    # depth of we have a simple_stmt that's a comment node. This represents a
138    # standalone comment and in the case of it coming directly after the
139    # funcdef, it is a "top" comment for the whole function.
140    # TODO(eliben): add more relevant compound statements here.
141    single_stmt_suite = (
142        node.parent and pytree_utils.NodeName(node.parent) in self._STMT_TYPES)
143    is_comment_stmt = pytree_utils.IsCommentStatement(node)
144    if single_stmt_suite and not is_comment_stmt:
145      self._cur_depth += 1
146    self._StartNewLine()
147    self.DefaultNodeVisit(node)
148    if single_stmt_suite and not is_comment_stmt:
149      self._cur_depth -= 1
150
151  def _VisitCompoundStatement(self, node, substatement_names):
152    """Helper for visiting compound statements.
153
154    Python compound statements serve as containers for other statements. Thus,
155    when we encounter a new compound statement, we start a new logical line.
156
157    Arguments:
158      node: the node to visit.
159      substatement_names: set of node names. A compound statement will be
160        recognized as a NAME node with a name in this set.
161    """
162    for child in node.children:
163      # A pytree is structured in such a way that a single 'if_stmt' node will
164      # contain all the 'if', 'elif' and 'else' nodes as children (similar
165      # structure applies to 'while' statements, 'try' blocks, etc). Therefore,
166      # we visit all children here and create a new line before the requested
167      # set of nodes.
168      if (child.type == grammar_token.NAME and
169          child.value in substatement_names):
170        self._StartNewLine()
171      self.Visit(child)
172
173  _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'})
174
175  def Visit_if_stmt(self, node):  # pylint: disable=invalid-name
176    self._VisitCompoundStatement(node, self._IF_STMT_ELEMS)
177
178  _WHILE_STMT_ELEMS = frozenset({'while', 'else'})
179
180  def Visit_while_stmt(self, node):  # pylint: disable=invalid-name
181    self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS)
182
183  _FOR_STMT_ELEMS = frozenset({'for', 'else'})
184
185  def Visit_for_stmt(self, node):  # pylint: disable=invalid-name
186    self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS)
187
188  _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'})
189
190  def Visit_try_stmt(self, node):  # pylint: disable=invalid-name
191    self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS)
192
193  _EXCEPT_STMT_ELEMS = frozenset({'except'})
194
195  def Visit_except_clause(self, node):  # pylint: disable=invalid-name
196    self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS)
197
198  _FUNC_DEF_ELEMS = frozenset({'def'})
199
200  def Visit_funcdef(self, node):  # pylint: disable=invalid-name
201    self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS)
202
203  def Visit_async_funcdef(self, node):  # pylint: disable=invalid-name
204    self._StartNewLine()
205    index = 0
206    for child in node.children:
207      index += 1
208      self.Visit(child)
209      if child.type == grammar_token.ASYNC:
210        break
211    for child in node.children[index].children:
212      self.Visit(child)
213
214  _CLASS_DEF_ELEMS = frozenset({'class'})
215
216  def Visit_classdef(self, node):  # pylint: disable=invalid-name
217    self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS)
218
219  def Visit_async_stmt(self, node):  # pylint: disable=invalid-name
220    self._StartNewLine()
221    index = 0
222    for child in node.children:
223      index += 1
224      self.Visit(child)
225      if child.type == grammar_token.ASYNC:
226        break
227    for child in node.children[index].children:
228      if child.type == grammar_token.NAME and child.value == 'else':
229        self._StartNewLine()
230      self.Visit(child)
231
232  def Visit_decorator(self, node):  # pylint: disable=invalid-name
233    for child in node.children:
234      self.Visit(child)
235      if child.type == grammar_token.COMMENT and child == node.children[0]:
236        self._StartNewLine()
237
238  def Visit_decorators(self, node):  # pylint: disable=invalid-name
239    for child in node.children:
240      self._StartNewLine()
241      self.Visit(child)
242
243  def Visit_decorated(self, node):  # pylint: disable=invalid-name
244    for child in node.children:
245      self._StartNewLine()
246      self.Visit(child)
247
248  _WITH_STMT_ELEMS = frozenset({'with'})
249
250  def Visit_with_stmt(self, node):  # pylint: disable=invalid-name
251    self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS)
252
253  def Visit_suite(self, node):  # pylint: disable=invalid-name
254    # A 'suite' starts a new indentation level in Python.
255    self._cur_depth += 1
256    self._StartNewLine()
257    self.DefaultNodeVisit(node)
258    self._cur_depth -= 1
259
260  def Visit_listmaker(self, node):  # pylint: disable=invalid-name
261    _DetermineMustSplitAnnotation(node)
262    self.DefaultNodeVisit(node)
263
264  def Visit_dictsetmaker(self, node):  # pylint: disable=invalid-name
265    _DetermineMustSplitAnnotation(node)
266    self.DefaultNodeVisit(node)
267
268  def Visit_import_as_names(self, node):  # pylint: disable=invalid-name
269    if node.prev_sibling.value == '(':
270      _DetermineMustSplitAnnotation(node)
271    self.DefaultNodeVisit(node)
272
273  def Visit_testlist_gexp(self, node):  # pylint: disable=invalid-name
274    _DetermineMustSplitAnnotation(node)
275    self.DefaultNodeVisit(node)
276
277  def Visit_arglist(self, node):  # pylint: disable=invalid-name
278    _DetermineMustSplitAnnotation(node)
279    self.DefaultNodeVisit(node)
280
281  def Visit_typedargslist(self, node):  # pylint: disable=invalid-name
282    _DetermineMustSplitAnnotation(node)
283    self.DefaultNodeVisit(node)
284
285  def DefaultLeafVisit(self, leaf):
286    """Default visitor for tree leaves.
287
288    A tree leaf is always just gets appended to the current logical line.
289
290    Arguments:
291      leaf: the leaf to visit.
292    """
293    if leaf.type in _WHITESPACE_TOKENS:
294      self._StartNewLine()
295    elif leaf.type != grammar_token.COMMENT or leaf.value.strip():
296      # Add non-whitespace tokens and comments that aren't empty.
297      self._cur_logical_line.AppendNode(leaf)
298
299
300_BRACKET_MATCH = {')': '(', '}': '{', ']': '['}
301
302
303def _MatchBrackets(line):
304  """Visit the node and match the brackets.
305
306  For every open bracket ('[', '{', or '('), find the associated closing bracket
307  and "match" them up. I.e., save in the token a pointer to its associated open
308  or close bracket.
309
310  Arguments:
311    line: (LogicalLine) A logical line.
312  """
313  bracket_stack = []
314  for token in line.tokens:
315    if token.value in pytree_utils.OPENING_BRACKETS:
316      bracket_stack.append(token)
317    elif token.value in pytree_utils.CLOSING_BRACKETS:
318      bracket_stack[-1].matching_bracket = token
319      token.matching_bracket = bracket_stack[-1]
320      bracket_stack.pop()
321
322    for bracket in bracket_stack:
323      if id(pytree_utils.GetOpeningBracket(token.node)) == id(bracket.node):
324        bracket.container_elements.append(token)
325        token.container_opening = bracket
326
327
328def _IdentifyParameterLists(line):
329  """Visit the node to create a state for parameter lists.
330
331  For instance, a parameter is considered an "object" with its first and last
332  token uniquely identifying the object.
333
334  Arguments:
335    line: (LogicalLine) A logical line.
336  """
337  func_stack = []
338  param_stack = []
339  for tok in line.tokens:
340    # Identify parameter list objects.
341    if subtypes.FUNC_DEF in tok.subtypes:
342      assert tok.next_token.value == '('
343      func_stack.append(tok.next_token)
344      continue
345
346    if func_stack and tok.value == ')':
347      if tok == func_stack[-1].matching_bracket:
348        func_stack.pop()
349      continue
350
351    # Identify parameter objects.
352    if subtypes.PARAMETER_START in tok.subtypes:
353      param_stack.append(tok)
354
355    # Not "elif", a parameter could be a single token.
356    if param_stack and subtypes.PARAMETER_STOP in tok.subtypes:
357      start = param_stack.pop()
358      func_stack[-1].parameters.append(object_state.Parameter(start, tok))
359
360
361def _AdjustSplitPenalty(line):
362  """Visit the node and adjust the split penalties if needed.
363
364  A token shouldn't be split if it's not within a bracket pair. Mark any token
365  that's not within a bracket pair as "unbreakable".
366
367  Arguments:
368    line: (LogicalLine) An logical line.
369  """
370  bracket_level = 0
371  for index, token in enumerate(line.tokens):
372    if index and not bracket_level:
373      pytree_utils.SetNodeAnnotation(token.node,
374                                     pytree_utils.Annotation.SPLIT_PENALTY,
375                                     split_penalty.UNBREAKABLE)
376    if token.value in pytree_utils.OPENING_BRACKETS:
377      bracket_level += 1
378    elif token.value in pytree_utils.CLOSING_BRACKETS:
379      bracket_level -= 1
380
381
382def _DetermineMustSplitAnnotation(node):
383  """Enforce a split in the list if the list ends with a comma."""
384  if style.Get('DISABLE_ENDING_COMMA_HEURISTIC'):
385    return
386  if not _ContainsComments(node):
387    token = next(node.parent.leaves())
388    if token.value == '(':
389      if sum(1 for ch in node.children if ch.type == grammar_token.COMMA) < 2:
390        return
391    if (not isinstance(node.children[-1], pytree.Leaf) or
392        node.children[-1].value != ','):
393      return
394  num_children = len(node.children)
395  index = 0
396  _SetMustSplitOnFirstLeaf(node.children[0])
397  while index < num_children - 1:
398    child = node.children[index]
399    if isinstance(child, pytree.Leaf) and child.value == ',':
400      next_child = node.children[index + 1]
401      if next_child.type == grammar_token.COMMENT:
402        index += 1
403        if index >= num_children - 1:
404          break
405      _SetMustSplitOnFirstLeaf(node.children[index + 1])
406    index += 1
407
408
409def _ContainsComments(node):
410  """Return True if the list has a comment in it."""
411  if isinstance(node, pytree.Leaf):
412    return node.type == grammar_token.COMMENT
413  for child in node.children:
414    if _ContainsComments(child):
415      return True
416  return False
417
418
419def _SetMustSplitOnFirstLeaf(node):
420  """Set the "must split" annotation on the first leaf node."""
421  pytree_utils.SetNodeAnnotation(
422      pytree_utils.FirstLeafNode(node), pytree_utils.Annotation.MUST_SPLIT,
423      True)
424