1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Decide what the format for the code should be. 15 16The `unwrapped_line.UnwrappedLine`s are now ready to be formatted. 17UnwrappedLines that can be merged together are. The best formatting is returned 18as a string. 19 20 Reformat(): the main function exported by this module. 21""" 22 23from __future__ import unicode_literals 24import collections 25import heapq 26import re 27 28from lib2to3 import pytree 29from lib2to3.pgen2 import token 30 31from yapf.yapflib import format_decision_state 32from yapf.yapflib import format_token 33from yapf.yapflib import line_joiner 34from yapf.yapflib import pytree_utils 35from yapf.yapflib import style 36from yapf.yapflib import verifier 37 38 39def Reformat(uwlines, verify=False, lines=None): 40 """Reformat the unwrapped lines. 41 42 Arguments: 43 uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format. 44 verify: (bool) True if reformatted code should be verified for syntax. 45 lines: (set of int) The lines which can be modified or None if there is no 46 line range restriction. 47 48 Returns: 49 A string representing the reformatted code. 50 """ 51 final_lines = [] 52 prev_uwline = None # The previous line. 53 indent_width = style.Get('INDENT_WIDTH') 54 55 for uwline in _SingleOrMergedLines(uwlines): 56 first_token = uwline.first 57 _FormatFirstToken(first_token, uwline.depth, prev_uwline, final_lines) 58 59 indent_amt = indent_width * uwline.depth 60 state = format_decision_state.FormatDecisionState(uwline, indent_amt) 61 state.MoveStateToNextToken() 62 63 if not uwline.disable: 64 if uwline.first.is_comment: 65 uwline.first.node.value = uwline.first.node.value.rstrip() 66 elif uwline.last.is_comment: 67 uwline.last.node.value = uwline.last.node.value.rstrip() 68 if prev_uwline and prev_uwline.disable: 69 # Keep the vertical spacing between a disabled and enabled formatting 70 # region. 71 _RetainRequiredVerticalSpacingBetweenTokens(uwline.first, 72 prev_uwline.last, lines) 73 if any(tok.is_comment for tok in uwline.tokens): 74 _RetainVerticalSpacingBeforeComments(uwline) 75 76 if (_LineContainsI18n(uwline) or uwline.disable or 77 _LineHasContinuationMarkers(uwline)): 78 _RetainHorizontalSpacing(uwline) 79 _RetainRequiredVerticalSpacing(uwline, prev_uwline, lines) 80 _EmitLineUnformatted(state) 81 elif _CanPlaceOnSingleLine(uwline) and not any(tok.must_split 82 for tok in uwline.tokens): 83 # The unwrapped line fits on one line. 84 while state.next_token: 85 state.AddTokenToState(newline=False, dry_run=False) 86 else: 87 if not _AnalyzeSolutionSpace(state): 88 # Failsafe mode. If there isn't a solution to the line, then just emit 89 # it as is. 90 state = format_decision_state.FormatDecisionState(uwline, indent_amt) 91 state.MoveStateToNextToken() 92 _RetainHorizontalSpacing(uwline) 93 _RetainRequiredVerticalSpacing(uwline, prev_uwline, None) 94 _EmitLineUnformatted(state) 95 96 final_lines.append(uwline) 97 prev_uwline = uwline 98 return _FormatFinalLines(final_lines, verify) 99 100 101def _RetainHorizontalSpacing(uwline): 102 """Retain all horizontal spacing between tokens.""" 103 for tok in uwline.tokens: 104 tok.RetainHorizontalSpacing(uwline.first.column, uwline.depth) 105 106 107def _RetainRequiredVerticalSpacing(cur_uwline, prev_uwline, lines): 108 prev_tok = None 109 if prev_uwline is not None: 110 prev_tok = prev_uwline.last 111 for cur_tok in cur_uwline.tokens: 112 _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines) 113 prev_tok = cur_tok 114 115 116def _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines): 117 """Retain vertical spacing between two tokens if not in editable range.""" 118 if prev_tok is None: 119 return 120 121 if prev_tok.is_string: 122 prev_lineno = prev_tok.lineno + prev_tok.value.count('\n') 123 elif prev_tok.is_pseudo_paren: 124 if not prev_tok.previous_token.is_multiline_string: 125 prev_lineno = prev_tok.previous_token.lineno 126 else: 127 prev_lineno = prev_tok.lineno 128 else: 129 prev_lineno = prev_tok.lineno 130 131 if cur_tok.is_comment: 132 cur_lineno = cur_tok.lineno - cur_tok.value.count('\n') 133 else: 134 cur_lineno = cur_tok.lineno 135 136 if prev_tok.value.endswith('\\'): 137 prev_lineno += prev_tok.value.count('\n') 138 139 required_newlines = cur_lineno - prev_lineno 140 if cur_tok.is_comment and not prev_tok.is_comment: 141 # Don't adjust between a comment and non-comment. 142 pass 143 elif lines and (cur_lineno in lines or prev_lineno in lines): 144 desired_newlines = cur_tok.whitespace_prefix.count('\n') 145 whitespace_lines = range(prev_lineno + 1, cur_lineno) 146 deletable_lines = len(lines.intersection(whitespace_lines)) 147 required_newlines = max(required_newlines - deletable_lines, 148 desired_newlines) 149 150 cur_tok.AdjustNewlinesBefore(required_newlines) 151 152 153def _RetainVerticalSpacingBeforeComments(uwline): 154 """Retain vertical spacing before comments.""" 155 prev_token = None 156 for tok in uwline.tokens: 157 if tok.is_comment and prev_token: 158 if tok.lineno - tok.value.count('\n') - prev_token.lineno > 1: 159 tok.AdjustNewlinesBefore(ONE_BLANK_LINE) 160 161 prev_token = tok 162 163 164def _EmitLineUnformatted(state): 165 """Emit the line without formatting. 166 167 The line contains code that if reformatted would break a non-syntactic 168 convention. E.g., i18n comments and function calls are tightly bound by 169 convention. Instead, we calculate when / if a newline should occur and honor 170 that. But otherwise the code emitted will be the same as the original code. 171 172 Arguments: 173 state: (format_decision_state.FormatDecisionState) The format decision 174 state. 175 """ 176 prev_lineno = None 177 while state.next_token: 178 previous_token = state.next_token.previous_token 179 previous_lineno = previous_token.lineno 180 181 if previous_token.is_multiline_string or previous_token.is_string: 182 previous_lineno += previous_token.value.count('\n') 183 184 if previous_token.is_continuation: 185 newline = False 186 else: 187 newline = ( 188 prev_lineno is not None and state.next_token.lineno > previous_lineno) 189 190 prev_lineno = state.next_token.lineno 191 state.AddTokenToState(newline=newline, dry_run=False) 192 193 194def _LineContainsI18n(uwline): 195 """Return true if there are i18n comments or function calls in the line. 196 197 I18n comments and pseudo-function calls are closely related. They cannot 198 be moved apart without breaking i18n. 199 200 Arguments: 201 uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted. 202 203 Returns: 204 True if the line contains i18n comments or function calls. False otherwise. 205 """ 206 if style.Get('I18N_COMMENT'): 207 for tok in uwline.tokens: 208 if tok.is_comment and re.match(style.Get('I18N_COMMENT'), tok.value): 209 # Contains an i18n comment. 210 return True 211 212 if style.Get('I18N_FUNCTION_CALL'): 213 length = len(uwline.tokens) 214 index = 0 215 while index < length - 1: 216 if (uwline.tokens[index + 1].value == '(' and 217 uwline.tokens[index].value in style.Get('I18N_FUNCTION_CALL')): 218 return True 219 index += 1 220 221 return False 222 223 224def _LineHasContinuationMarkers(uwline): 225 """Return true if the line has continuation markers in it.""" 226 return any(tok.is_continuation for tok in uwline.tokens) 227 228 229def _CanPlaceOnSingleLine(uwline): 230 """Determine if the unwrapped line can go on a single line. 231 232 Arguments: 233 uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted. 234 235 Returns: 236 True if the line can or should be added to a single line. False otherwise. 237 """ 238 indent_amt = style.Get('INDENT_WIDTH') * uwline.depth 239 last = uwline.last 240 last_index = -1 241 if last.is_pylint_comment or last.is_pytype_comment: 242 last = last.previous_token 243 last_index = -2 244 if last is None: 245 return True 246 return (last.total_length + indent_amt <= style.Get('COLUMN_LIMIT') and 247 not any(tok.is_comment for tok in uwline.tokens[:last_index])) 248 249 250def _FormatFinalLines(final_lines, verify): 251 """Compose the final output from the finalized lines.""" 252 formatted_code = [] 253 for line in final_lines: 254 formatted_line = [] 255 for tok in line.tokens: 256 if not tok.is_pseudo_paren: 257 formatted_line.append(tok.whitespace_prefix) 258 formatted_line.append(tok.value) 259 else: 260 if (not tok.next_token.whitespace_prefix.startswith('\n') and 261 not tok.next_token.whitespace_prefix.startswith(' ')): 262 if (tok.previous_token.value == ':' or 263 tok.next_token.value not in ',}])'): 264 formatted_line.append(' ') 265 266 formatted_code.append(''.join(formatted_line)) 267 if verify: 268 verifier.VerifyCode(formatted_code[-1]) 269 270 return ''.join(formatted_code) + '\n' 271 272 273class _StateNode(object): 274 """An edge in the solution space from 'previous.state' to 'state'. 275 276 Attributes: 277 state: (format_decision_state.FormatDecisionState) The format decision state 278 for this node. 279 newline: If True, then on the edge from 'previous.state' to 'state' a 280 newline is inserted. 281 previous: (_StateNode) The previous state node in the graph. 282 """ 283 284 # TODO(morbo): Add a '__cmp__' method. 285 286 def __init__(self, state, newline, previous): 287 self.state = state.Clone() 288 self.newline = newline 289 self.previous = previous 290 291 def __repr__(self): # pragma: no cover 292 return 'StateNode(state=[\n{0}\n], newline={1})'.format( 293 self.state, self.newline) 294 295 296# A tuple of (penalty, count) that is used to prioritize the BFS. In case of 297# equal penalties, we prefer states that were inserted first. During state 298# generation, we make sure that we insert states first that break the line as 299# late as possible. 300_OrderedPenalty = collections.namedtuple('OrderedPenalty', ['penalty', 'count']) 301 302# An item in the prioritized BFS search queue. The 'StateNode's 'state' has 303# the given '_OrderedPenalty'. 304_QueueItem = collections.namedtuple('QueueItem', 305 ['ordered_penalty', 'state_node']) 306 307 308def _AnalyzeSolutionSpace(initial_state): 309 """Analyze the entire solution space starting from initial_state. 310 311 This implements a variant of Dijkstra's algorithm on the graph that spans 312 the solution space (LineStates are the nodes). The algorithm tries to find 313 the shortest path (the one with the lowest penalty) from 'initial_state' to 314 the state where all tokens are placed. 315 316 Arguments: 317 initial_state: (format_decision_state.FormatDecisionState) The initial state 318 to start the search from. 319 320 Returns: 321 True if a formatting solution was found. False otherwise. 322 """ 323 count = 0 324 seen = set() 325 p_queue = [] 326 327 # Insert start element. 328 node = _StateNode(initial_state, False, None) 329 heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(0, count), node)) 330 331 count += 1 332 while p_queue: 333 item = p_queue[0] 334 penalty = item.ordered_penalty.penalty 335 node = item.state_node 336 if not node.state.next_token: 337 break 338 heapq.heappop(p_queue) 339 340 if count > 10000: 341 node.state.ignore_stack_for_comparison = True 342 343 if node.state in seen: 344 continue 345 346 seen.add(node.state) 347 348 # FIXME(morbo): Add a 'decision' element? 349 350 count = _AddNextStateToQueue(penalty, node, False, count, p_queue) 351 count = _AddNextStateToQueue(penalty, node, True, count, p_queue) 352 353 if not p_queue: 354 # We weren't able to find a solution. Do nothing. 355 return False 356 357 _ReconstructPath(initial_state, heapq.heappop(p_queue).state_node) 358 return True 359 360 361def _AddNextStateToQueue(penalty, previous_node, newline, count, p_queue): 362 """Add the following state to the analysis queue. 363 364 Assume the current state is 'previous_node' and has been reached with a 365 penalty of 'penalty'. Insert a line break if 'newline' is True. 366 367 Arguments: 368 penalty: (int) The penalty associated with the path up to this point. 369 previous_node: (_StateNode) The last _StateNode inserted into the priority 370 queue. 371 newline: (bool) Add a newline if True. 372 count: (int) The number of elements in the queue. 373 p_queue: (heapq) The priority queue representing the solution space. 374 375 Returns: 376 The updated number of elements in the queue. 377 """ 378 must_split = previous_node.state.MustSplit() 379 if newline and not previous_node.state.CanSplit(must_split): 380 # Don't add a newline if the token cannot be split. 381 return count 382 if not newline and must_split: 383 # Don't add a token we must split but where we aren't splitting. 384 return count 385 386 node = _StateNode(previous_node.state, newline, previous_node) 387 penalty += node.state.AddTokenToState( 388 newline=newline, dry_run=True, must_split=must_split) 389 heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(penalty, count), node)) 390 return count + 1 391 392 393def _ReconstructPath(initial_state, current): 394 """Reconstruct the path through the queue with lowest penalty. 395 396 Arguments: 397 initial_state: (format_decision_state.FormatDecisionState) The initial state 398 to start the search from. 399 current: (_StateNode) The node in the decision graph that is the end point 400 of the path with the least penalty. 401 """ 402 path = collections.deque() 403 404 while current.previous: 405 path.appendleft(current) 406 current = current.previous 407 408 for node in path: 409 initial_state.AddTokenToState(newline=node.newline, dry_run=False) 410 411 412def _FormatFirstToken(first_token, indent_depth, prev_uwline, final_lines): 413 """Format the first token in the unwrapped line. 414 415 Add a newline and the required indent before the first token of the unwrapped 416 line. 417 418 Arguments: 419 first_token: (format_token.FormatToken) The first token in the unwrapped 420 line. 421 indent_depth: (int) The line's indentation depth. 422 prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line 423 previous to this line. 424 final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines 425 that have already been processed. 426 """ 427 first_token.AddWhitespacePrefix( 428 _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, 429 final_lines), 430 indent_level=indent_depth) 431 432 433NO_BLANK_LINES = 1 434ONE_BLANK_LINE = 2 435TWO_BLANK_LINES = 3 436 437 438def _IsClassOrDef(uwline): 439 if uwline.first.value in {'class', 'def'}: 440 return True 441 442 return [t.value for t in uwline.tokens[:2]] == ['async', 'def'] 443 444 445def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, 446 final_lines): 447 """Calculate the number of newlines we need to add. 448 449 Arguments: 450 first_token: (format_token.FormatToken) The first token in the unwrapped 451 line. 452 indent_depth: (int) The line's indentation depth. 453 prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line 454 previous to this line. 455 final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines 456 that have already been processed. 457 458 Returns: 459 The number of newlines needed before the first token. 460 """ 461 # TODO(morbo): Special handling for imports. 462 # TODO(morbo): Create a knob that can tune these. 463 if prev_uwline is None: 464 # The first line in the file. Don't add blank lines. 465 # FIXME(morbo): Is this correct? 466 if first_token.newlines is not None: 467 pytree_utils.SetNodeAnnotation(first_token.node, 468 pytree_utils.Annotation.NEWLINES, None) 469 return 0 470 471 if first_token.is_docstring: 472 if (prev_uwline.first.value == 'class' and 473 style.Get('BLANK_LINE_BEFORE_CLASS_DOCSTRING')): 474 # Enforce a blank line before a class's docstring. 475 return ONE_BLANK_LINE 476 elif (prev_uwline.first.value.startswith('#') and 477 style.Get('BLANK_LINE_BEFORE_MODULE_DOCSTRING')): 478 # Enforce a blank line before a module's docstring. 479 return ONE_BLANK_LINE 480 # The docstring shouldn't have a newline before it. 481 return NO_BLANK_LINES 482 483 prev_last_token = prev_uwline.last 484 if prev_last_token.is_docstring: 485 if (not indent_depth and first_token.value in {'class', 'def', 'async'}): 486 # Separate a class or function from the module-level docstring with 487 # appropriate number of blank lines. 488 return 1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION') 489 if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, 490 prev_last_token): 491 return NO_BLANK_LINES 492 else: 493 return ONE_BLANK_LINE 494 495 if first_token.value in {'class', 'def', 'async', '@'}: 496 # TODO(morbo): This can go once the blank line calculator is more 497 # sophisticated. 498 if not indent_depth: 499 # This is a top-level class or function. 500 is_inline_comment = prev_last_token.whitespace_prefix.count('\n') == 0 501 if (not prev_uwline.disable and prev_last_token.is_comment and 502 not is_inline_comment): 503 # This token follows a non-inline comment. 504 if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, 505 prev_last_token): 506 # Assume that the comment is "attached" to the current line. 507 # Therefore, we want two blank lines before the comment. 508 index = len(final_lines) - 1 509 while index > 0: 510 if not final_lines[index - 1].is_comment: 511 break 512 index -= 1 513 if final_lines[index - 1].first.value == '@': 514 final_lines[index].first.AdjustNewlinesBefore(NO_BLANK_LINES) 515 else: 516 prev_last_token.AdjustNewlinesBefore( 517 1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION')) 518 if first_token.newlines is not None: 519 pytree_utils.SetNodeAnnotation( 520 first_token.node, pytree_utils.Annotation.NEWLINES, None) 521 return NO_BLANK_LINES 522 elif _IsClassOrDef(prev_uwline): 523 if not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'): 524 pytree_utils.SetNodeAnnotation(first_token.node, 525 pytree_utils.Annotation.NEWLINES, None) 526 return NO_BLANK_LINES 527 528 # Calculate how many newlines were between the original lines. We want to 529 # retain that formatting if it doesn't violate one of the style guide rules. 530 if first_token.is_comment: 531 first_token_lineno = first_token.lineno - first_token.value.count('\n') 532 else: 533 first_token_lineno = first_token.lineno 534 535 prev_last_token_lineno = prev_last_token.lineno 536 if prev_last_token.is_multiline_string: 537 prev_last_token_lineno += prev_last_token.value.count('\n') 538 539 if first_token_lineno - prev_last_token_lineno > 1: 540 return ONE_BLANK_LINE 541 542 return NO_BLANK_LINES 543 544 545def _SingleOrMergedLines(uwlines): 546 """Generate the lines we want to format. 547 548 Arguments: 549 uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format. 550 551 Yields: 552 Either a single line, if the current line cannot be merged with the 553 succeeding line, or the next two lines merged into one line. 554 """ 555 index = 0 556 last_was_merged = False 557 while index < len(uwlines): 558 if uwlines[index].disable: 559 uwline = uwlines[index] 560 index += 1 561 while index < len(uwlines): 562 column = uwline.last.column + 2 563 if uwlines[index].lineno != uwline.lineno: 564 break 565 if uwline.last.value != ':': 566 leaf = pytree.Leaf( 567 type=token.SEMI, value=';', context=('', (uwline.lineno, column))) 568 uwline.AppendToken(format_token.FormatToken(leaf)) 569 for tok in uwlines[index].tokens: 570 uwline.AppendToken(tok) 571 index += 1 572 yield uwline 573 elif line_joiner.CanMergeMultipleLines(uwlines[index:], last_was_merged): 574 # TODO(morbo): This splice is potentially very slow. Come up with a more 575 # performance-friendly way of determining if two lines can be merged. 576 next_uwline = uwlines[index + 1] 577 for tok in next_uwline.tokens: 578 uwlines[index].AppendToken(tok) 579 if (len(next_uwline.tokens) == 1 and 580 next_uwline.first.is_multiline_string): 581 # This may be a multiline shebang. In that case, we want to retain the 582 # formatting. Otherwise, it could mess up the shell script's syntax. 583 uwlines[index].disable = True 584 yield uwlines[index] 585 index += 2 586 last_was_merged = True 587 else: 588 yield uwlines[index] 589 index += 1 590 last_was_merged = False 591 592 593def _NoBlankLinesBeforeCurrentToken(text, cur_token, prev_token): 594 """Determine if there are no blank lines before the current token. 595 596 The previous token is a docstring or comment. The prev_token_lineno is the 597 start of the text of that token. Counting the number of newlines in its text 598 gives us the extent and thus where the line number of the end of the 599 docstring or comment. After that, we just compare it to the current token's 600 line number to see if there are blank lines between them. 601 602 Arguments: 603 text: (unicode) The text of the docstring or comment before the current 604 token. 605 cur_token: (format_token.FormatToken) The current token in the unwrapped 606 line. 607 prev_token: (format_token.FormatToken) The previous token in the unwrapped 608 line. 609 610 Returns: 611 True if there is no blank line before the current token. 612 """ 613 cur_token_lineno = cur_token.lineno 614 if cur_token.is_comment: 615 cur_token_lineno -= cur_token.value.count('\n') 616 num_newlines = text.count('\n') if not prev_token.is_comment else 0 617 return prev_token.lineno + num_newlines == cur_token_lineno - 1 618