1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""PyTreeUnwrapper - produces a list of logical lines from a pytree. 15 16[for a description of what a logical line is, see logical_line.py] 17 18This is a pytree visitor that goes over a parse tree and produces a list of 19LogicalLine containers from it, each with its own depth and containing all the 20tokens that could fit on the line if there were no maximal line-length 21limitations. 22 23Note: a precondition to running this visitor and obtaining correct results is 24for the tree to have its comments spliced in as nodes. Prefixes are ignored. 25 26For most uses, the convenience function UnwrapPyTree should be sufficient. 27""" 28 29# The word "token" is overloaded within this module, so for clarity rename 30# the imported pgen2.token module. 31from lib2to3 import pytree 32from lib2to3.pgen2 import token as grammar_token 33 34from yapf.yapflib import format_token 35from yapf.yapflib import logical_line 36from yapf.yapflib import object_state 37from yapf.yapflib import pytree_utils 38from yapf.yapflib import pytree_visitor 39from yapf.yapflib import split_penalty 40from yapf.yapflib import style 41from yapf.yapflib import subtypes 42 43 44def UnwrapPyTree(tree): 45 """Create and return a list of logical lines from the given pytree. 46 47 Arguments: 48 tree: the top-level pytree node to unwrap.. 49 50 Returns: 51 A list of LogicalLine objects. 52 """ 53 unwrapper = PyTreeUnwrapper() 54 unwrapper.Visit(tree) 55 llines = unwrapper.GetLogicalLines() 56 llines.sort(key=lambda x: x.lineno) 57 return llines 58 59 60# Grammar tokens considered as whitespace for the purpose of unwrapping. 61_WHITESPACE_TOKENS = frozenset([ 62 grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT, 63 grammar_token.ENDMARKER 64]) 65 66 67class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): 68 """PyTreeUnwrapper - see file-level docstring for detailed description. 69 70 Note: since this implements PyTreeVisitor and node names in lib2to3 are 71 underscore_separated, the visiting methods of this class are named as 72 Visit_node_name. invalid-name pragmas are added to each such method to silence 73 a style warning. This is forced on us by the usage of lib2to3, and re-munging 74 method names to make them different from actual node names sounded like a 75 confusing and brittle affair that wasn't worth it for this small & controlled 76 deviation from the style guide. 77 78 To understand the connection between visitor methods in this class, some 79 familiarity with the Python grammar is required. 80 """ 81 82 def __init__(self): 83 # A list of all logical lines finished visiting so far. 84 self._logical_lines = [] 85 86 # Builds up a "current" logical line while visiting pytree nodes. Some nodes 87 # will finish a line and start a new one. 88 self._cur_logical_line = logical_line.LogicalLine(0) 89 90 # Current indentation depth. 91 self._cur_depth = 0 92 93 def GetLogicalLines(self): 94 """Fetch the result of the tree walk. 95 96 Note: only call this after visiting the whole tree. 97 98 Returns: 99 A list of LogicalLine objects. 100 """ 101 # Make sure the last line that was being populated is flushed. 102 self._StartNewLine() 103 return self._logical_lines 104 105 def _StartNewLine(self): 106 """Finish current line and start a new one. 107 108 Place the currently accumulated line into the _logical_lines list and 109 start a new one. 110 """ 111 if self._cur_logical_line.tokens: 112 self._logical_lines.append(self._cur_logical_line) 113 _MatchBrackets(self._cur_logical_line) 114 _IdentifyParameterLists(self._cur_logical_line) 115 _AdjustSplitPenalty(self._cur_logical_line) 116 self._cur_logical_line = logical_line.LogicalLine(self._cur_depth) 117 118 _STMT_TYPES = frozenset({ 119 'if_stmt', 120 'while_stmt', 121 'for_stmt', 122 'try_stmt', 123 'expect_clause', 124 'with_stmt', 125 'funcdef', 126 'classdef', 127 }) 128 129 # pylint: disable=invalid-name,missing-docstring 130 def Visit_simple_stmt(self, node): 131 # A 'simple_stmt' conveniently represents a non-compound Python statement, 132 # i.e. a statement that does not contain other statements. 133 134 # When compound nodes have a single statement as their suite, the parser 135 # can leave it in the tree directly without creating a suite. But we have 136 # to increase depth in these cases as well. However, don't increase the 137 # depth of we have a simple_stmt that's a comment node. This represents a 138 # standalone comment and in the case of it coming directly after the 139 # funcdef, it is a "top" comment for the whole function. 140 # TODO(eliben): add more relevant compound statements here. 141 single_stmt_suite = ( 142 node.parent and pytree_utils.NodeName(node.parent) in self._STMT_TYPES) 143 is_comment_stmt = pytree_utils.IsCommentStatement(node) 144 if single_stmt_suite and not is_comment_stmt: 145 self._cur_depth += 1 146 self._StartNewLine() 147 self.DefaultNodeVisit(node) 148 if single_stmt_suite and not is_comment_stmt: 149 self._cur_depth -= 1 150 151 def _VisitCompoundStatement(self, node, substatement_names): 152 """Helper for visiting compound statements. 153 154 Python compound statements serve as containers for other statements. Thus, 155 when we encounter a new compound statement, we start a new logical line. 156 157 Arguments: 158 node: the node to visit. 159 substatement_names: set of node names. A compound statement will be 160 recognized as a NAME node with a name in this set. 161 """ 162 for child in node.children: 163 # A pytree is structured in such a way that a single 'if_stmt' node will 164 # contain all the 'if', 'elif' and 'else' nodes as children (similar 165 # structure applies to 'while' statements, 'try' blocks, etc). Therefore, 166 # we visit all children here and create a new line before the requested 167 # set of nodes. 168 if (child.type == grammar_token.NAME and 169 child.value in substatement_names): 170 self._StartNewLine() 171 self.Visit(child) 172 173 _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'}) 174 175 def Visit_if_stmt(self, node): # pylint: disable=invalid-name 176 self._VisitCompoundStatement(node, self._IF_STMT_ELEMS) 177 178 _WHILE_STMT_ELEMS = frozenset({'while', 'else'}) 179 180 def Visit_while_stmt(self, node): # pylint: disable=invalid-name 181 self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS) 182 183 _FOR_STMT_ELEMS = frozenset({'for', 'else'}) 184 185 def Visit_for_stmt(self, node): # pylint: disable=invalid-name 186 self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS) 187 188 _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'}) 189 190 def Visit_try_stmt(self, node): # pylint: disable=invalid-name 191 self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS) 192 193 _EXCEPT_STMT_ELEMS = frozenset({'except'}) 194 195 def Visit_except_clause(self, node): # pylint: disable=invalid-name 196 self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS) 197 198 _FUNC_DEF_ELEMS = frozenset({'def'}) 199 200 def Visit_funcdef(self, node): # pylint: disable=invalid-name 201 self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS) 202 203 def Visit_async_funcdef(self, node): # pylint: disable=invalid-name 204 self._StartNewLine() 205 index = 0 206 for child in node.children: 207 index += 1 208 self.Visit(child) 209 if child.type == grammar_token.ASYNC: 210 break 211 for child in node.children[index].children: 212 self.Visit(child) 213 214 _CLASS_DEF_ELEMS = frozenset({'class'}) 215 216 def Visit_classdef(self, node): # pylint: disable=invalid-name 217 self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS) 218 219 def Visit_async_stmt(self, node): # pylint: disable=invalid-name 220 self._StartNewLine() 221 index = 0 222 for child in node.children: 223 index += 1 224 self.Visit(child) 225 if child.type == grammar_token.ASYNC: 226 break 227 for child in node.children[index].children: 228 if child.type == grammar_token.NAME and child.value == 'else': 229 self._StartNewLine() 230 self.Visit(child) 231 232 def Visit_decorator(self, node): # pylint: disable=invalid-name 233 for child in node.children: 234 self.Visit(child) 235 if child.type == grammar_token.COMMENT and child == node.children[0]: 236 self._StartNewLine() 237 238 def Visit_decorators(self, node): # pylint: disable=invalid-name 239 for child in node.children: 240 self._StartNewLine() 241 self.Visit(child) 242 243 def Visit_decorated(self, node): # pylint: disable=invalid-name 244 for child in node.children: 245 self._StartNewLine() 246 self.Visit(child) 247 248 _WITH_STMT_ELEMS = frozenset({'with'}) 249 250 def Visit_with_stmt(self, node): # pylint: disable=invalid-name 251 self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS) 252 253 def Visit_suite(self, node): # pylint: disable=invalid-name 254 # A 'suite' starts a new indentation level in Python. 255 self._cur_depth += 1 256 self._StartNewLine() 257 self.DefaultNodeVisit(node) 258 self._cur_depth -= 1 259 260 def Visit_listmaker(self, node): # pylint: disable=invalid-name 261 _DetermineMustSplitAnnotation(node) 262 self.DefaultNodeVisit(node) 263 264 def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name 265 _DetermineMustSplitAnnotation(node) 266 self.DefaultNodeVisit(node) 267 268 def Visit_import_as_names(self, node): # pylint: disable=invalid-name 269 if node.prev_sibling.value == '(': 270 _DetermineMustSplitAnnotation(node) 271 self.DefaultNodeVisit(node) 272 273 def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name 274 _DetermineMustSplitAnnotation(node) 275 self.DefaultNodeVisit(node) 276 277 def Visit_arglist(self, node): # pylint: disable=invalid-name 278 _DetermineMustSplitAnnotation(node) 279 self.DefaultNodeVisit(node) 280 281 def Visit_typedargslist(self, node): # pylint: disable=invalid-name 282 _DetermineMustSplitAnnotation(node) 283 self.DefaultNodeVisit(node) 284 285 def DefaultLeafVisit(self, leaf): 286 """Default visitor for tree leaves. 287 288 A tree leaf is always just gets appended to the current logical line. 289 290 Arguments: 291 leaf: the leaf to visit. 292 """ 293 if leaf.type in _WHITESPACE_TOKENS: 294 self._StartNewLine() 295 elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): 296 # Add non-whitespace tokens and comments that aren't empty. 297 self._cur_logical_line.AppendNode(leaf) 298 299 300_BRACKET_MATCH = {')': '(', '}': '{', ']': '['} 301 302 303def _MatchBrackets(line): 304 """Visit the node and match the brackets. 305 306 For every open bracket ('[', '{', or '('), find the associated closing bracket 307 and "match" them up. I.e., save in the token a pointer to its associated open 308 or close bracket. 309 310 Arguments: 311 line: (LogicalLine) A logical line. 312 """ 313 bracket_stack = [] 314 for token in line.tokens: 315 if token.value in pytree_utils.OPENING_BRACKETS: 316 bracket_stack.append(token) 317 elif token.value in pytree_utils.CLOSING_BRACKETS: 318 bracket_stack[-1].matching_bracket = token 319 token.matching_bracket = bracket_stack[-1] 320 bracket_stack.pop() 321 322 for bracket in bracket_stack: 323 if id(pytree_utils.GetOpeningBracket(token.node)) == id(bracket.node): 324 bracket.container_elements.append(token) 325 token.container_opening = bracket 326 327 328def _IdentifyParameterLists(line): 329 """Visit the node to create a state for parameter lists. 330 331 For instance, a parameter is considered an "object" with its first and last 332 token uniquely identifying the object. 333 334 Arguments: 335 line: (LogicalLine) A logical line. 336 """ 337 func_stack = [] 338 param_stack = [] 339 for tok in line.tokens: 340 # Identify parameter list objects. 341 if subtypes.FUNC_DEF in tok.subtypes: 342 assert tok.next_token.value == '(' 343 func_stack.append(tok.next_token) 344 continue 345 346 if func_stack and tok.value == ')': 347 if tok == func_stack[-1].matching_bracket: 348 func_stack.pop() 349 continue 350 351 # Identify parameter objects. 352 if subtypes.PARAMETER_START in tok.subtypes: 353 param_stack.append(tok) 354 355 # Not "elif", a parameter could be a single token. 356 if param_stack and subtypes.PARAMETER_STOP in tok.subtypes: 357 start = param_stack.pop() 358 func_stack[-1].parameters.append(object_state.Parameter(start, tok)) 359 360 361def _AdjustSplitPenalty(line): 362 """Visit the node and adjust the split penalties if needed. 363 364 A token shouldn't be split if it's not within a bracket pair. Mark any token 365 that's not within a bracket pair as "unbreakable". 366 367 Arguments: 368 line: (LogicalLine) An logical line. 369 """ 370 bracket_level = 0 371 for index, token in enumerate(line.tokens): 372 if index and not bracket_level: 373 pytree_utils.SetNodeAnnotation(token.node, 374 pytree_utils.Annotation.SPLIT_PENALTY, 375 split_penalty.UNBREAKABLE) 376 if token.value in pytree_utils.OPENING_BRACKETS: 377 bracket_level += 1 378 elif token.value in pytree_utils.CLOSING_BRACKETS: 379 bracket_level -= 1 380 381 382def _DetermineMustSplitAnnotation(node): 383 """Enforce a split in the list if the list ends with a comma.""" 384 if style.Get('DISABLE_ENDING_COMMA_HEURISTIC'): 385 return 386 if not _ContainsComments(node): 387 token = next(node.parent.leaves()) 388 if token.value == '(': 389 if sum(1 for ch in node.children if ch.type == grammar_token.COMMA) < 2: 390 return 391 if (not isinstance(node.children[-1], pytree.Leaf) or 392 node.children[-1].value != ','): 393 return 394 num_children = len(node.children) 395 index = 0 396 _SetMustSplitOnFirstLeaf(node.children[0]) 397 while index < num_children - 1: 398 child = node.children[index] 399 if isinstance(child, pytree.Leaf) and child.value == ',': 400 next_child = node.children[index + 1] 401 if next_child.type == grammar_token.COMMENT: 402 index += 1 403 if index >= num_children - 1: 404 break 405 _SetMustSplitOnFirstLeaf(node.children[index + 1]) 406 index += 1 407 408 409def _ContainsComments(node): 410 """Return True if the list has a comment in it.""" 411 if isinstance(node, pytree.Leaf): 412 return node.type == grammar_token.COMMENT 413 for child in node.children: 414 if _ContainsComments(child): 415 return True 416 return False 417 418 419def _SetMustSplitOnFirstLeaf(node): 420 """Set the "must split" annotation on the first leaf node.""" 421 pytree_utils.SetNodeAnnotation( 422 pytree_utils.FirstLeafNode(node), pytree_utils.Annotation.MUST_SPLIT, 423 True) 424