1# Copyright 2015-2017 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree. 15 16[for a description of what an unwrapped line is, see unwrapped_line.py] 17 18This is a pytree visitor that goes over a parse tree and produces a list of 19UnwrappedLine containers from it, each with its own depth and containing all 20the tokens that could fit on the line if there were no maximal line-length 21limitations. 22 23Note: a precondition to running this visitor and obtaining correct results is 24for the tree to have its comments spliced in as nodes. Prefixes are ignored. 25 26For most uses, the convenience function UnwrapPyTree should be sufficient. 27""" 28 29# The word "token" is overloaded within this module, so for clarity rename 30# the imported pgen2.token module. 31from lib2to3 import pytree 32from lib2to3.pgen2 import token as grammar_token 33 34from yapf.yapflib import pytree_utils 35from yapf.yapflib import pytree_visitor 36from yapf.yapflib import split_penalty 37from yapf.yapflib import unwrapped_line 38 39 40def UnwrapPyTree(tree): 41 """Create and return a list of unwrapped lines from the given pytree. 42 43 Arguments: 44 tree: the top-level pytree node to unwrap. 45 46 Returns: 47 A list of UnwrappedLine objects. 48 """ 49 unwrapper = PyTreeUnwrapper() 50 unwrapper.Visit(tree) 51 uwlines = unwrapper.GetUnwrappedLines() 52 uwlines.sort(key=lambda x: x.lineno) 53 return uwlines 54 55 56# Grammar tokens considered as whitespace for the purpose of unwrapping. 57_WHITESPACE_TOKENS = frozenset([ 58 grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT, 59 grammar_token.ENDMARKER 60]) 61 62 63class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): 64 """PyTreeUnwrapper - see file-level docstring for detailed description. 65 66 Note: since this implements PyTreeVisitor and node names in lib2to3 are 67 underscore_separated, the visiting methods of this class are named as 68 Visit_node_name. invalid-name pragmas are added to each such method to silence 69 a style warning. This is forced on us by the usage of lib2to3, and re-munging 70 method names to make them different from actual node names sounded like a 71 confusing and brittle affair that wasn't worth it for this small & controlled 72 deviation from the style guide. 73 74 To understand the connection between visitor methods in this class, some 75 familiarity with the Python grammar is required. 76 """ 77 78 def __init__(self): 79 # A list of all unwrapped lines finished visiting so far. 80 self._unwrapped_lines = [] 81 82 # Builds up a "current" unwrapped line while visiting pytree nodes. Some 83 # nodes will finish a line and start a new one. 84 self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0) 85 86 # Current indentation depth. 87 self._cur_depth = 0 88 89 def GetUnwrappedLines(self): 90 """Fetch the result of the tree walk. 91 92 Note: only call this after visiting the whole tree. 93 94 Returns: 95 A list of UnwrappedLine objects. 96 """ 97 # Make sure the last line that was being populated is flushed. 98 self._StartNewLine() 99 return self._unwrapped_lines 100 101 def _StartNewLine(self): 102 """Finish current line and start a new one. 103 104 Place the currently accumulated line into the _unwrapped_lines list and 105 start a new one. 106 """ 107 if self._cur_unwrapped_line.tokens: 108 self._unwrapped_lines.append(self._cur_unwrapped_line) 109 _MatchBrackets(self._cur_unwrapped_line) 110 _AdjustSplitPenalty(self._cur_unwrapped_line) 111 self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth) 112 113 _STMT_TYPES = frozenset({ 114 'if_stmt', 115 'while_stmt', 116 'for_stmt', 117 'try_stmt', 118 'expect_clause', 119 'with_stmt', 120 'funcdef', 121 'classdef', 122 }) 123 124 # pylint: disable=invalid-name,missing-docstring 125 def Visit_simple_stmt(self, node): 126 # A 'simple_stmt' conveniently represents a non-compound Python statement, 127 # i.e. a statement that does not contain other statements. 128 129 # When compound nodes have a single statement as their suite, the parser 130 # can leave it in the tree directly without creating a suite. But we have 131 # to increase depth in these cases as well. However, don't increase the 132 # depth of we have a simple_stmt that's a comment node. This represents a 133 # standalone comment and in the case of it coming directly after the 134 # funcdef, it is a "top" comment for the whole function. 135 # TODO(eliben): add more relevant compound statements here. 136 single_stmt_suite = (node.parent and 137 pytree_utils.NodeName(node.parent) in self._STMT_TYPES) 138 is_comment_stmt = pytree_utils.IsCommentStatement(node) 139 if single_stmt_suite and not is_comment_stmt: 140 self._cur_depth += 1 141 self._StartNewLine() 142 self.DefaultNodeVisit(node) 143 if single_stmt_suite and not is_comment_stmt: 144 self._cur_depth -= 1 145 146 def _VisitCompoundStatement(self, node, substatement_names): 147 """Helper for visiting compound statements. 148 149 Python compound statements serve as containers for other statements. Thus, 150 when we encounter a new compound statement we start a new unwrapped line. 151 152 Arguments: 153 node: the node to visit. 154 substatement_names: set of node names. A compound statement will be 155 recognized as a NAME node with a name in this set. 156 """ 157 for child in node.children: 158 # A pytree is structured in such a way that a single 'if_stmt' node will 159 # contain all the 'if', 'elif' and 'else' nodes as children (similar 160 # structure applies to 'while' statements, 'try' blocks, etc). Therefore, 161 # we visit all children here and create a new line before the requested 162 # set of nodes. 163 if (child.type == grammar_token.NAME and 164 child.value in substatement_names): 165 self._StartNewLine() 166 self.Visit(child) 167 168 _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'}) 169 170 def Visit_if_stmt(self, node): # pylint: disable=invalid-name 171 self._VisitCompoundStatement(node, self._IF_STMT_ELEMS) 172 173 _WHILE_STMT_ELEMS = frozenset({'while', 'else'}) 174 175 def Visit_while_stmt(self, node): # pylint: disable=invalid-name 176 self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS) 177 178 _FOR_STMT_ELEMS = frozenset({'for', 'else'}) 179 180 def Visit_for_stmt(self, node): # pylint: disable=invalid-name 181 self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS) 182 183 _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'}) 184 185 def Visit_try_stmt(self, node): # pylint: disable=invalid-name 186 self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS) 187 188 _EXCEPT_STMT_ELEMS = frozenset({'except'}) 189 190 def Visit_except_clause(self, node): # pylint: disable=invalid-name 191 self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS) 192 193 _FUNC_DEF_ELEMS = frozenset({'def'}) 194 195 def Visit_funcdef(self, node): # pylint: disable=invalid-name 196 self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS) 197 198 def Visit_async_funcdef(self, node): # pylint: disable=invalid-name 199 self._StartNewLine() 200 index = 0 201 for child in node.children: 202 index += 1 203 self.Visit(child) 204 if pytree_utils.NodeName(child) == 'ASYNC': 205 break 206 for child in node.children[index].children: 207 self.Visit(child) 208 209 _CLASS_DEF_ELEMS = frozenset({'class'}) 210 211 def Visit_classdef(self, node): # pylint: disable=invalid-name 212 self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS) 213 214 def Visit_async_stmt(self, node): # pylint: disable=invalid-name 215 self._StartNewLine() 216 index = 0 217 for child in node.children: 218 index += 1 219 self.Visit(child) 220 if pytree_utils.NodeName(child) == 'ASYNC': 221 break 222 for child in node.children[index].children: 223 self.Visit(child) 224 225 def Visit_decorators(self, node): # pylint: disable=invalid-name 226 for child in node.children: 227 self._StartNewLine() 228 self.Visit(child) 229 230 def Visit_decorated(self, node): # pylint: disable=invalid-name 231 for child in node.children: 232 self._StartNewLine() 233 self.Visit(child) 234 235 _WITH_STMT_ELEMS = frozenset({'with'}) 236 237 def Visit_with_stmt(self, node): # pylint: disable=invalid-name 238 self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS) 239 240 def Visit_suite(self, node): # pylint: disable=invalid-name 241 # A 'suite' starts a new indentation level in Python. 242 self._cur_depth += 1 243 self._StartNewLine() 244 self.DefaultNodeVisit(node) 245 self._cur_depth -= 1 246 247 def Visit_listmaker(self, node): # pylint: disable=invalid-name 248 _DetermineMustSplitAnnotation(node) 249 self.DefaultNodeVisit(node) 250 251 def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name 252 _DetermineMustSplitAnnotation(node) 253 self.DefaultNodeVisit(node) 254 255 def Visit_import_as_names(self, node): # pylint: disable=invalid-name 256 if node.prev_sibling.value == '(': 257 _DetermineMustSplitAnnotation(node) 258 self.DefaultNodeVisit(node) 259 260 def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name 261 if _ContainsComments(node): 262 _DetermineMustSplitAnnotation(node) 263 self.DefaultNodeVisit(node) 264 265 def Visit_arglist(self, node): # pylint: disable=invalid-name 266 _DetermineMustSplitAnnotation(node) 267 self.DefaultNodeVisit(node) 268 269 def Visit_typedargslist(self, node): # pylint: disable=invalid-name 270 _DetermineMustSplitAnnotation(node) 271 self.DefaultNodeVisit(node) 272 273 def DefaultLeafVisit(self, leaf): 274 """Default visitor for tree leaves. 275 276 A tree leaf is always just gets appended to the current unwrapped line. 277 278 Arguments: 279 leaf: the leaf to visit. 280 """ 281 if leaf.type in _WHITESPACE_TOKENS: 282 self._StartNewLine() 283 elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): 284 if leaf.value == ';': 285 # Split up multiple statements on one line. 286 self._StartNewLine() 287 else: 288 # Add non-whitespace tokens and comments that aren't empty. 289 self._cur_unwrapped_line.AppendNode(leaf) 290 291 292_BRACKET_MATCH = {')': '(', '}': '{', ']': '['} 293 294 295def _MatchBrackets(uwline): 296 """Visit the node and match the brackets. 297 298 For every open bracket ('[', '{', or '('), find the associated closing bracket 299 and "match" them up. I.e., save in the token a pointer to its associated open 300 or close bracket. 301 302 Arguments: 303 uwline: (UnwrappedLine) An unwrapped line. 304 """ 305 bracket_stack = [] 306 for token in uwline.tokens: 307 if token.value in pytree_utils.OPENING_BRACKETS: 308 bracket_stack.append(token) 309 elif token.value in pytree_utils.CLOSING_BRACKETS: 310 bracket_stack[-1].matching_bracket = token 311 token.matching_bracket = bracket_stack[-1] 312 bracket_stack.pop() 313 314 315def _AdjustSplitPenalty(uwline): 316 """Visit the node and adjust the split penalties if needed. 317 318 A token shouldn't be split if it's not within a bracket pair. Mark any token 319 that's not within a bracket pair as "unbreakable". 320 321 Arguments: 322 uwline: (UnwrappedLine) An unwrapped line. 323 """ 324 bracket_level = 0 325 for index, token in enumerate(uwline.tokens): 326 if index and not bracket_level: 327 pytree_utils.SetNodeAnnotation(token.node, 328 pytree_utils.Annotation.SPLIT_PENALTY, 329 split_penalty.UNBREAKABLE) 330 if token.value in pytree_utils.OPENING_BRACKETS: 331 bracket_level += 1 332 elif token.value in pytree_utils.CLOSING_BRACKETS: 333 bracket_level -= 1 334 335 336def _DetermineMustSplitAnnotation(node): 337 """Enforce a split in the list if the list ends with a comma.""" 338 if not _ContainsComments(node): 339 if (not isinstance(node.children[-1], pytree.Leaf) or 340 node.children[-1].value != ','): 341 return 342 num_children = len(node.children) 343 index = 0 344 _SetMustSplitOnFirstLeaf(node.children[0]) 345 while index < num_children - 1: 346 child = node.children[index] 347 if isinstance(child, pytree.Leaf) and child.value == ',': 348 next_child = node.children[index + 1] 349 if next_child.type == grammar_token.COMMENT: 350 index += 1 351 if index >= num_children - 1: 352 break 353 _SetMustSplitOnFirstLeaf(node.children[index + 1]) 354 index += 1 355 356 357def _ContainsComments(node): 358 """Return True if the list has a comment in it.""" 359 if isinstance(node, pytree.Leaf): 360 return node.type == grammar_token.COMMENT 361 for child in node.children: 362 if _ContainsComments(child): 363 return True 364 return False 365 366 367def _SetMustSplitOnFirstLeaf(node): 368 """Set the "must split" annotation on the first leaf node.""" 369 370 def FindFirstLeaf(node): 371 if isinstance(node, pytree.Leaf): 372 return node 373 return FindFirstLeaf(node.children[0]) 374 375 pytree_utils.SetNodeAnnotation( 376 FindFirstLeaf(node), pytree_utils.Annotation.MUST_SPLIT, True) 377