• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015-2017 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree.
15
16[for a description of what an unwrapped line is, see unwrapped_line.py]
17
18This is a pytree visitor that goes over a parse tree and produces a list of
19UnwrappedLine containers from it, each with its own depth and containing all
20the tokens that could fit on the line if there were no maximal line-length
21limitations.
22
23Note: a precondition to running this visitor and obtaining correct results is
24for the tree to have its comments spliced in as nodes. Prefixes are ignored.
25
26For most uses, the convenience function UnwrapPyTree should be sufficient.
27"""
28
29# The word "token" is overloaded within this module, so for clarity rename
30# the imported pgen2.token module.
31from lib2to3 import pytree
32from lib2to3.pgen2 import token as grammar_token
33
34from yapf.yapflib import pytree_utils
35from yapf.yapflib import pytree_visitor
36from yapf.yapflib import split_penalty
37from yapf.yapflib import unwrapped_line
38
39
40def UnwrapPyTree(tree):
41  """Create and return a list of unwrapped lines from the given pytree.
42
43  Arguments:
44    tree: the top-level pytree node to unwrap.
45
46  Returns:
47    A list of UnwrappedLine objects.
48  """
49  unwrapper = PyTreeUnwrapper()
50  unwrapper.Visit(tree)
51  uwlines = unwrapper.GetUnwrappedLines()
52  uwlines.sort(key=lambda x: x.lineno)
53  return uwlines
54
55
56# Grammar tokens considered as whitespace for the purpose of unwrapping.
57_WHITESPACE_TOKENS = frozenset([
58    grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT,
59    grammar_token.ENDMARKER
60])
61
62
63class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor):
64  """PyTreeUnwrapper - see file-level docstring for detailed description.
65
66  Note: since this implements PyTreeVisitor and node names in lib2to3 are
67  underscore_separated, the visiting methods of this class are named as
68  Visit_node_name. invalid-name pragmas are added to each such method to silence
69  a style warning. This is forced on us by the usage of lib2to3, and re-munging
70  method names to make them different from actual node names sounded like a
71  confusing and brittle affair that wasn't worth it for this small & controlled
72  deviation from the style guide.
73
74  To understand the connection between visitor methods in this class, some
75  familiarity with the Python grammar is required.
76  """
77
78  def __init__(self):
79    # A list of all unwrapped lines finished visiting so far.
80    self._unwrapped_lines = []
81
82    # Builds up a "current" unwrapped line while visiting pytree nodes. Some
83    # nodes will finish a line and start a new one.
84    self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0)
85
86    # Current indentation depth.
87    self._cur_depth = 0
88
89  def GetUnwrappedLines(self):
90    """Fetch the result of the tree walk.
91
92    Note: only call this after visiting the whole tree.
93
94    Returns:
95      A list of UnwrappedLine objects.
96    """
97    # Make sure the last line that was being populated is flushed.
98    self._StartNewLine()
99    return self._unwrapped_lines
100
101  def _StartNewLine(self):
102    """Finish current line and start a new one.
103
104    Place the currently accumulated line into the _unwrapped_lines list and
105    start a new one.
106    """
107    if self._cur_unwrapped_line.tokens:
108      self._unwrapped_lines.append(self._cur_unwrapped_line)
109      _MatchBrackets(self._cur_unwrapped_line)
110      _AdjustSplitPenalty(self._cur_unwrapped_line)
111    self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth)
112
113  _STMT_TYPES = frozenset({
114      'if_stmt',
115      'while_stmt',
116      'for_stmt',
117      'try_stmt',
118      'expect_clause',
119      'with_stmt',
120      'funcdef',
121      'classdef',
122  })
123
124  # pylint: disable=invalid-name,missing-docstring
125  def Visit_simple_stmt(self, node):
126    # A 'simple_stmt' conveniently represents a non-compound Python statement,
127    # i.e. a statement that does not contain other statements.
128
129    # When compound nodes have a single statement as their suite, the parser
130    # can leave it in the tree directly without creating a suite. But we have
131    # to increase depth in these cases as well. However, don't increase the
132    # depth of we have a simple_stmt that's a comment node. This represents a
133    # standalone comment and in the case of it coming directly after the
134    # funcdef, it is a "top" comment for the whole function.
135    # TODO(eliben): add more relevant compound statements here.
136    single_stmt_suite = (node.parent and
137                         pytree_utils.NodeName(node.parent) in self._STMT_TYPES)
138    is_comment_stmt = pytree_utils.IsCommentStatement(node)
139    if single_stmt_suite and not is_comment_stmt:
140      self._cur_depth += 1
141    self._StartNewLine()
142    self.DefaultNodeVisit(node)
143    if single_stmt_suite and not is_comment_stmt:
144      self._cur_depth -= 1
145
146  def _VisitCompoundStatement(self, node, substatement_names):
147    """Helper for visiting compound statements.
148
149    Python compound statements serve as containers for other statements. Thus,
150    when we encounter a new compound statement we start a new unwrapped line.
151
152    Arguments:
153      node: the node to visit.
154      substatement_names: set of node names. A compound statement will be
155        recognized as a NAME node with a name in this set.
156    """
157    for child in node.children:
158      # A pytree is structured in such a way that a single 'if_stmt' node will
159      # contain all the 'if', 'elif' and 'else' nodes as children (similar
160      # structure applies to 'while' statements, 'try' blocks, etc). Therefore,
161      # we visit all children here and create a new line before the requested
162      # set of nodes.
163      if (child.type == grammar_token.NAME and
164          child.value in substatement_names):
165        self._StartNewLine()
166      self.Visit(child)
167
168  _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'})
169
170  def Visit_if_stmt(self, node):  # pylint: disable=invalid-name
171    self._VisitCompoundStatement(node, self._IF_STMT_ELEMS)
172
173  _WHILE_STMT_ELEMS = frozenset({'while', 'else'})
174
175  def Visit_while_stmt(self, node):  # pylint: disable=invalid-name
176    self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS)
177
178  _FOR_STMT_ELEMS = frozenset({'for', 'else'})
179
180  def Visit_for_stmt(self, node):  # pylint: disable=invalid-name
181    self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS)
182
183  _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'})
184
185  def Visit_try_stmt(self, node):  # pylint: disable=invalid-name
186    self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS)
187
188  _EXCEPT_STMT_ELEMS = frozenset({'except'})
189
190  def Visit_except_clause(self, node):  # pylint: disable=invalid-name
191    self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS)
192
193  _FUNC_DEF_ELEMS = frozenset({'def'})
194
195  def Visit_funcdef(self, node):  # pylint: disable=invalid-name
196    self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS)
197
198  def Visit_async_funcdef(self, node):  # pylint: disable=invalid-name
199    self._StartNewLine()
200    index = 0
201    for child in node.children:
202      index += 1
203      self.Visit(child)
204      if pytree_utils.NodeName(child) == 'ASYNC':
205        break
206    for child in node.children[index].children:
207      self.Visit(child)
208
209  _CLASS_DEF_ELEMS = frozenset({'class'})
210
211  def Visit_classdef(self, node):  # pylint: disable=invalid-name
212    self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS)
213
214  def Visit_async_stmt(self, node):  # pylint: disable=invalid-name
215    self._StartNewLine()
216    index = 0
217    for child in node.children:
218      index += 1
219      self.Visit(child)
220      if pytree_utils.NodeName(child) == 'ASYNC':
221        break
222    for child in node.children[index].children:
223      self.Visit(child)
224
225  def Visit_decorators(self, node):  # pylint: disable=invalid-name
226    for child in node.children:
227      self._StartNewLine()
228      self.Visit(child)
229
230  def Visit_decorated(self, node):  # pylint: disable=invalid-name
231    for child in node.children:
232      self._StartNewLine()
233      self.Visit(child)
234
235  _WITH_STMT_ELEMS = frozenset({'with'})
236
237  def Visit_with_stmt(self, node):  # pylint: disable=invalid-name
238    self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS)
239
240  def Visit_suite(self, node):  # pylint: disable=invalid-name
241    # A 'suite' starts a new indentation level in Python.
242    self._cur_depth += 1
243    self._StartNewLine()
244    self.DefaultNodeVisit(node)
245    self._cur_depth -= 1
246
247  def Visit_listmaker(self, node):  # pylint: disable=invalid-name
248    _DetermineMustSplitAnnotation(node)
249    self.DefaultNodeVisit(node)
250
251  def Visit_dictsetmaker(self, node):  # pylint: disable=invalid-name
252    _DetermineMustSplitAnnotation(node)
253    self.DefaultNodeVisit(node)
254
255  def Visit_import_as_names(self, node):  # pylint: disable=invalid-name
256    if node.prev_sibling.value == '(':
257      _DetermineMustSplitAnnotation(node)
258    self.DefaultNodeVisit(node)
259
260  def Visit_testlist_gexp(self, node):  # pylint: disable=invalid-name
261    if _ContainsComments(node):
262      _DetermineMustSplitAnnotation(node)
263    self.DefaultNodeVisit(node)
264
265  def Visit_arglist(self, node):  # pylint: disable=invalid-name
266    _DetermineMustSplitAnnotation(node)
267    self.DefaultNodeVisit(node)
268
269  def Visit_typedargslist(self, node):  # pylint: disable=invalid-name
270    _DetermineMustSplitAnnotation(node)
271    self.DefaultNodeVisit(node)
272
273  def DefaultLeafVisit(self, leaf):
274    """Default visitor for tree leaves.
275
276    A tree leaf is always just gets appended to the current unwrapped line.
277
278    Arguments:
279      leaf: the leaf to visit.
280    """
281    if leaf.type in _WHITESPACE_TOKENS:
282      self._StartNewLine()
283    elif leaf.type != grammar_token.COMMENT or leaf.value.strip():
284      if leaf.value == ';':
285        # Split up multiple statements on one line.
286        self._StartNewLine()
287      else:
288        # Add non-whitespace tokens and comments that aren't empty.
289        self._cur_unwrapped_line.AppendNode(leaf)
290
291
292_BRACKET_MATCH = {')': '(', '}': '{', ']': '['}
293
294
295def _MatchBrackets(uwline):
296  """Visit the node and match the brackets.
297
298  For every open bracket ('[', '{', or '('), find the associated closing bracket
299  and "match" them up. I.e., save in the token a pointer to its associated open
300  or close bracket.
301
302  Arguments:
303    uwline: (UnwrappedLine) An unwrapped line.
304  """
305  bracket_stack = []
306  for token in uwline.tokens:
307    if token.value in pytree_utils.OPENING_BRACKETS:
308      bracket_stack.append(token)
309    elif token.value in pytree_utils.CLOSING_BRACKETS:
310      bracket_stack[-1].matching_bracket = token
311      token.matching_bracket = bracket_stack[-1]
312      bracket_stack.pop()
313
314
315def _AdjustSplitPenalty(uwline):
316  """Visit the node and adjust the split penalties if needed.
317
318  A token shouldn't be split if it's not within a bracket pair. Mark any token
319  that's not within a bracket pair as "unbreakable".
320
321  Arguments:
322    uwline: (UnwrappedLine) An unwrapped line.
323  """
324  bracket_level = 0
325  for index, token in enumerate(uwline.tokens):
326    if index and not bracket_level:
327      pytree_utils.SetNodeAnnotation(token.node,
328                                     pytree_utils.Annotation.SPLIT_PENALTY,
329                                     split_penalty.UNBREAKABLE)
330    if token.value in pytree_utils.OPENING_BRACKETS:
331      bracket_level += 1
332    elif token.value in pytree_utils.CLOSING_BRACKETS:
333      bracket_level -= 1
334
335
336def _DetermineMustSplitAnnotation(node):
337  """Enforce a split in the list if the list ends with a comma."""
338  if not _ContainsComments(node):
339    if (not isinstance(node.children[-1], pytree.Leaf) or
340        node.children[-1].value != ','):
341      return
342  num_children = len(node.children)
343  index = 0
344  _SetMustSplitOnFirstLeaf(node.children[0])
345  while index < num_children - 1:
346    child = node.children[index]
347    if isinstance(child, pytree.Leaf) and child.value == ',':
348      next_child = node.children[index + 1]
349      if next_child.type == grammar_token.COMMENT:
350        index += 1
351        if index >= num_children - 1:
352          break
353      _SetMustSplitOnFirstLeaf(node.children[index + 1])
354    index += 1
355
356
357def _ContainsComments(node):
358  """Return True if the list has a comment in it."""
359  if isinstance(node, pytree.Leaf):
360    return node.type == grammar_token.COMMENT
361  for child in node.children:
362    if _ContainsComments(child):
363      return True
364  return False
365
366
367def _SetMustSplitOnFirstLeaf(node):
368  """Set the "must split" annotation on the first leaf node."""
369
370  def FindFirstLeaf(node):
371    if isinstance(node, pytree.Leaf):
372      return node
373    return FindFirstLeaf(node.children[0])
374
375  pytree_utils.SetNodeAnnotation(
376      FindFirstLeaf(node), pytree_utils.Annotation.MUST_SPLIT, True)
377