1# Copyright 2015-2017 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Decide what the format for the code should be. 15 16The `unwrapped_line.UnwrappedLine`s are now ready to be formatted. 17UnwrappedLines that can be merged together are. The best formatting is returned 18as a string. 19 20 Reformat(): the main function exported by this module. 21""" 22 23from __future__ import unicode_literals 24import collections 25import heapq 26import re 27 28from lib2to3 import pytree 29from lib2to3.pgen2 import token 30 31from yapf.yapflib import format_decision_state 32from yapf.yapflib import format_token 33from yapf.yapflib import line_joiner 34from yapf.yapflib import pytree_utils 35from yapf.yapflib import style 36from yapf.yapflib import verifier 37 38 39def Reformat(uwlines, verify=False): 40 """Reformat the unwrapped lines. 41 42 Arguments: 43 uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format. 44 verify: (bool) True if reformatted code should be verified for syntax. 45 46 Returns: 47 A string representing the reformatted code. 48 """ 49 final_lines = [] 50 prev_uwline = None # The previous line. 51 indent_width = style.Get('INDENT_WIDTH') 52 53 for uwline in _SingleOrMergedLines(uwlines): 54 first_token = uwline.first 55 _FormatFirstToken(first_token, uwline.depth, prev_uwline, final_lines) 56 57 indent_amt = indent_width * uwline.depth 58 state = format_decision_state.FormatDecisionState(uwline, indent_amt) 59 state.MoveStateToNextToken() 60 61 if not uwline.disable: 62 if uwline.first.is_comment: 63 uwline.first.node.value = uwline.first.node.value.rstrip() 64 elif uwline.last.is_comment: 65 uwline.last.node.value = uwline.last.node.value.rstrip() 66 if prev_uwline and prev_uwline.disable: 67 # Keep the vertical spacing between a disabled and enabled formatting 68 # region. 69 _RetainVerticalSpacingBetweenTokens(uwline.first, prev_uwline.last) 70 if any(tok.is_comment for tok in uwline.tokens): 71 _RetainVerticalSpacingBeforeComments(uwline) 72 73 if (_LineContainsI18n(uwline) or uwline.disable or 74 _LineHasContinuationMarkers(uwline)): 75 _RetainHorizontalSpacing(uwline) 76 _RetainVerticalSpacing(uwline, prev_uwline) 77 _EmitLineUnformatted(state) 78 elif _CanPlaceOnSingleLine(uwline) and not any(tok.must_split 79 for tok in uwline.tokens): 80 # The unwrapped line fits on one line. 81 while state.next_token: 82 state.AddTokenToState(newline=False, dry_run=False) 83 else: 84 if not _AnalyzeSolutionSpace(state): 85 # Failsafe mode. If there isn't a solution to the line, then just emit 86 # it as is. 87 state = format_decision_state.FormatDecisionState(uwline, indent_amt) 88 state.MoveStateToNextToken() 89 _RetainHorizontalSpacing(uwline) 90 _RetainVerticalSpacing(uwline, prev_uwline) 91 _EmitLineUnformatted(state) 92 93 final_lines.append(uwline) 94 prev_uwline = uwline 95 return _FormatFinalLines(final_lines, verify) 96 97 98def _RetainHorizontalSpacing(uwline): 99 """Retain all horizontal spacing between tokens.""" 100 for tok in uwline.tokens: 101 tok.RetainHorizontalSpacing(uwline.first.column, uwline.depth) 102 103 104def _RetainVerticalSpacing(cur_uwline, prev_uwline): 105 prev_tok = None 106 if prev_uwline is not None: 107 prev_tok = prev_uwline.last 108 for cur_tok in cur_uwline.tokens: 109 _RetainVerticalSpacingBetweenTokens(cur_tok, prev_tok) 110 prev_tok = cur_tok 111 112 113def _RetainVerticalSpacingBetweenTokens(cur_tok, prev_tok): 114 """Retain vertical spacing between two tokens.""" 115 if prev_tok is None: 116 return 117 118 if prev_tok.is_string: 119 prev_lineno = prev_tok.lineno + prev_tok.value.count('\n') 120 elif prev_tok.is_pseudo_paren: 121 if not prev_tok.previous_token.is_multiline_string: 122 prev_lineno = prev_tok.previous_token.lineno 123 else: 124 prev_lineno = prev_tok.lineno 125 else: 126 prev_lineno = prev_tok.lineno 127 128 if cur_tok.is_comment: 129 cur_lineno = cur_tok.lineno - cur_tok.value.count('\n') 130 else: 131 cur_lineno = cur_tok.lineno 132 133 cur_tok.AdjustNewlinesBefore(cur_lineno - prev_lineno) 134 135 136def _RetainVerticalSpacingBeforeComments(uwline): 137 """Retain vertical spacing before comments.""" 138 prev_token = None 139 for tok in uwline.tokens: 140 if tok.is_comment and prev_token: 141 if tok.lineno - tok.value.count('\n') - prev_token.lineno > 1: 142 tok.AdjustNewlinesBefore(ONE_BLANK_LINE) 143 144 prev_token = tok 145 146 147def _EmitLineUnformatted(state): 148 """Emit the line without formatting. 149 150 The line contains code that if reformatted would break a non-syntactic 151 convention. E.g., i18n comments and function calls are tightly bound by 152 convention. Instead, we calculate when / if a newline should occur and honor 153 that. But otherwise the code emitted will be the same as the original code. 154 155 Arguments: 156 state: (format_decision_state.FormatDecisionState) The format decision 157 state. 158 """ 159 prev_lineno = None 160 while state.next_token: 161 previous_token = state.next_token.previous_token 162 previous_lineno = previous_token.lineno 163 164 if previous_token.is_multiline_string: 165 previous_lineno += previous_token.value.count('\n') 166 167 if previous_token.is_continuation: 168 newline = False 169 else: 170 newline = (prev_lineno is not None and 171 state.next_token.lineno > previous_lineno) 172 173 prev_lineno = state.next_token.lineno 174 state.AddTokenToState(newline=newline, dry_run=False) 175 176 177def _LineContainsI18n(uwline): 178 """Return true if there are i18n comments or function calls in the line. 179 180 I18n comments and pseudo-function calls are closely related. They cannot 181 be moved apart without breaking i18n. 182 183 Arguments: 184 uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted. 185 186 Returns: 187 True if the line contains i18n comments or function calls. False otherwise. 188 """ 189 if style.Get('I18N_COMMENT'): 190 for tok in uwline.tokens: 191 if tok.is_comment and re.match(style.Get('I18N_COMMENT'), tok.value): 192 # Contains an i18n comment. 193 return True 194 195 if style.Get('I18N_FUNCTION_CALL'): 196 length = len(uwline.tokens) 197 index = 0 198 while index < length - 1: 199 if (uwline.tokens[index + 1].value == '(' and 200 uwline.tokens[index].value in style.Get('I18N_FUNCTION_CALL')): 201 return True 202 index += 1 203 204 return False 205 206 207def _LineHasContinuationMarkers(uwline): 208 """Return true if the line has continuation markers in it.""" 209 return any(tok.is_continuation for tok in uwline.tokens) 210 211 212def _CanPlaceOnSingleLine(uwline): 213 """Determine if the unwrapped line can go on a single line. 214 215 Arguments: 216 uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted. 217 218 Returns: 219 True if the line can or should be added to a single line. False otherwise. 220 """ 221 indent_amt = style.Get('INDENT_WIDTH') * uwline.depth 222 last = uwline.last 223 last_index = -1 224 if last.is_pylint_comment: 225 last = last.previous_token 226 last_index = -2 227 if last is None: 228 return True 229 return (last.total_length + indent_amt <= style.Get('COLUMN_LIMIT') and 230 not any(tok.is_comment for tok in uwline.tokens[:last_index])) 231 232 233def _FormatFinalLines(final_lines, verify): 234 """Compose the final output from the finalized lines.""" 235 formatted_code = [] 236 for line in final_lines: 237 formatted_line = [] 238 for tok in line.tokens: 239 if not tok.is_pseudo_paren: 240 formatted_line.append(tok.whitespace_prefix) 241 formatted_line.append(tok.value) 242 else: 243 if (not tok.next_token.whitespace_prefix.startswith('\n') and 244 not tok.next_token.whitespace_prefix.startswith(' ')): 245 if (tok.previous_token.value == ':' or 246 tok.next_token.value not in ',}])'): 247 formatted_line.append(' ') 248 249 formatted_code.append(''.join(formatted_line)) 250 if verify: 251 verifier.VerifyCode(formatted_code[-1]) 252 253 return ''.join(formatted_code) + '\n' 254 255 256class _StateNode(object): 257 """An edge in the solution space from 'previous.state' to 'state'. 258 259 Attributes: 260 state: (format_decision_state.FormatDecisionState) The format decision state 261 for this node. 262 newline: If True, then on the edge from 'previous.state' to 'state' a 263 newline is inserted. 264 previous: (_StateNode) The previous state node in the graph. 265 """ 266 267 # TODO(morbo): Add a '__cmp__' method. 268 269 def __init__(self, state, newline, previous): 270 self.state = state.Clone() 271 self.newline = newline 272 self.previous = previous 273 274 def __repr__(self): # pragma: no cover 275 return 'StateNode(state=[\n{0}\n], newline={1})'.format( 276 self.state, self.newline) 277 278 279# A tuple of (penalty, count) that is used to prioritize the BFS. In case of 280# equal penalties, we prefer states that were inserted first. During state 281# generation, we make sure that we insert states first that break the line as 282# late as possible. 283_OrderedPenalty = collections.namedtuple('OrderedPenalty', ['penalty', 'count']) 284 285# An item in the prioritized BFS search queue. The 'StateNode's 'state' has 286# the given '_OrderedPenalty'. 287_QueueItem = collections.namedtuple('QueueItem', 288 ['ordered_penalty', 'state_node']) 289 290 291def _AnalyzeSolutionSpace(initial_state): 292 """Analyze the entire solution space starting from initial_state. 293 294 This implements a variant of Dijkstra's algorithm on the graph that spans 295 the solution space (LineStates are the nodes). The algorithm tries to find 296 the shortest path (the one with the lowest penalty) from 'initial_state' to 297 the state where all tokens are placed. 298 299 Arguments: 300 initial_state: (format_decision_state.FormatDecisionState) The initial state 301 to start the search from. 302 303 Returns: 304 True if a formatting solution was found. False otherwise. 305 """ 306 count = 0 307 seen = set() 308 p_queue = [] 309 310 # Insert start element. 311 node = _StateNode(initial_state, False, None) 312 heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(0, count), node)) 313 314 count += 1 315 while p_queue: 316 item = p_queue[0] 317 penalty = item.ordered_penalty.penalty 318 node = item.state_node 319 if not node.state.next_token: 320 break 321 heapq.heappop(p_queue) 322 323 if count > 10000: 324 node.state.ignore_stack_for_comparison = True 325 326 if node.state in seen: 327 continue 328 329 seen.add(node.state) 330 331 # FIXME(morbo): Add a 'decision' element? 332 333 count = _AddNextStateToQueue(penalty, node, False, count, p_queue) 334 count = _AddNextStateToQueue(penalty, node, True, count, p_queue) 335 336 if not p_queue: 337 # We weren't able to find a solution. Do nothing. 338 return False 339 340 _ReconstructPath(initial_state, heapq.heappop(p_queue).state_node) 341 return True 342 343 344def _AddNextStateToQueue(penalty, previous_node, newline, count, p_queue): 345 """Add the following state to the analysis queue. 346 347 Assume the current state is 'previous_node' and has been reached with a 348 penalty of 'penalty'. Insert a line break if 'newline' is True. 349 350 Arguments: 351 penalty: (int) The penalty associated with the path up to this point. 352 previous_node: (_StateNode) The last _StateNode inserted into the priority 353 queue. 354 newline: (bool) Add a newline if True. 355 count: (int) The number of elements in the queue. 356 p_queue: (heapq) The priority queue representing the solution space. 357 358 Returns: 359 The updated number of elements in the queue. 360 """ 361 must_split = previous_node.state.MustSplit() 362 if newline and not previous_node.state.CanSplit(must_split): 363 # Don't add a newline if the token cannot be split. 364 return count 365 if not newline and must_split: 366 # Don't add a token we must split but where we aren't splitting. 367 return count 368 369 node = _StateNode(previous_node.state, newline, previous_node) 370 penalty += node.state.AddTokenToState( 371 newline=newline, dry_run=True, must_split=must_split) 372 heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(penalty, count), node)) 373 return count + 1 374 375 376def _ReconstructPath(initial_state, current): 377 """Reconstruct the path through the queue with lowest penalty. 378 379 Arguments: 380 initial_state: (format_decision_state.FormatDecisionState) The initial state 381 to start the search from. 382 current: (_StateNode) The node in the decision graph that is the end point 383 of the path with the least penalty. 384 """ 385 path = collections.deque() 386 387 while current.previous: 388 path.appendleft(current) 389 current = current.previous 390 391 for node in path: 392 initial_state.AddTokenToState(newline=node.newline, dry_run=False) 393 394 395def _FormatFirstToken(first_token, indent_depth, prev_uwline, final_lines): 396 """Format the first token in the unwrapped line. 397 398 Add a newline and the required indent before the first token of the unwrapped 399 line. 400 401 Arguments: 402 first_token: (format_token.FormatToken) The first token in the unwrapped 403 line. 404 indent_depth: (int) The line's indentation depth. 405 prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line 406 previous to this line. 407 final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines 408 that have already been processed. 409 """ 410 first_token.AddWhitespacePrefix( 411 _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, 412 final_lines), 413 indent_level=indent_depth) 414 415 416NO_BLANK_LINES = 1 417ONE_BLANK_LINE = 2 418TWO_BLANK_LINES = 3 419 420 421def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline, 422 final_lines): 423 """Calculate the number of newlines we need to add. 424 425 Arguments: 426 first_token: (format_token.FormatToken) The first token in the unwrapped 427 line. 428 indent_depth: (int) The line's indentation depth. 429 prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line 430 previous to this line. 431 final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines 432 that have already been processed. 433 434 Returns: 435 The number of newlines needed before the first token. 436 """ 437 # TODO(morbo): Special handling for imports. 438 # TODO(morbo): Create a knob that can tune these. 439 if prev_uwline is None: 440 # The first line in the file. Don't add blank lines. 441 # FIXME(morbo): Is this correct? 442 if first_token.newlines is not None: 443 pytree_utils.SetNodeAnnotation(first_token.node, 444 pytree_utils.Annotation.NEWLINES, None) 445 return 0 446 447 if first_token.is_docstring: 448 if (prev_uwline.first.value == 'class' and 449 style.Get('BLANK_LINE_BEFORE_CLASS_DOCSTRING')): 450 # Enforce a blank line before a class's docstring. 451 return ONE_BLANK_LINE 452 # The docstring shouldn't have a newline before it. 453 return NO_BLANK_LINES 454 455 prev_last_token = prev_uwline.last 456 if prev_last_token.is_docstring: 457 if (not indent_depth and first_token.value in {'class', 'def', 'async'}): 458 # Separate a class or function from the module-level docstring with two 459 # blank lines. 460 return TWO_BLANK_LINES 461 if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, 462 prev_last_token): 463 return NO_BLANK_LINES 464 else: 465 return ONE_BLANK_LINE 466 467 if first_token.value in {'class', 'def', 'async', '@'}: 468 # TODO(morbo): This can go once the blank line calculator is more 469 # sophisticated. 470 if not indent_depth: 471 # This is a top-level class or function. 472 is_inline_comment = prev_last_token.whitespace_prefix.count('\n') == 0 473 if (not prev_uwline.disable and prev_last_token.is_comment and 474 not is_inline_comment): 475 # This token follows a non-inline comment. 476 if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token, 477 prev_last_token): 478 # Assume that the comment is "attached" to the current line. 479 # Therefore, we want two blank lines before the comment. 480 index = len(final_lines) - 1 481 while index > 0: 482 if not final_lines[index - 1].is_comment: 483 break 484 index -= 1 485 if final_lines[index - 1].first.value == '@': 486 final_lines[index].first.AdjustNewlinesBefore(NO_BLANK_LINES) 487 else: 488 prev_last_token.AdjustNewlinesBefore(TWO_BLANK_LINES) 489 if first_token.newlines is not None: 490 pytree_utils.SetNodeAnnotation( 491 first_token.node, pytree_utils.Annotation.NEWLINES, None) 492 return NO_BLANK_LINES 493 elif prev_uwline.first.value in {'class', 'def', 'async'}: 494 if not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'): 495 pytree_utils.SetNodeAnnotation(first_token.node, 496 pytree_utils.Annotation.NEWLINES, None) 497 return NO_BLANK_LINES 498 499 # Calculate how many newlines were between the original lines. We want to 500 # retain that formatting if it doesn't violate one of the style guide rules. 501 if first_token.is_comment: 502 first_token_lineno = first_token.lineno - first_token.value.count('\n') 503 else: 504 first_token_lineno = first_token.lineno 505 506 prev_last_token_lineno = prev_last_token.lineno 507 if prev_last_token.is_multiline_string: 508 prev_last_token_lineno += prev_last_token.value.count('\n') 509 510 if first_token_lineno - prev_last_token_lineno > 1: 511 return ONE_BLANK_LINE 512 513 return NO_BLANK_LINES 514 515 516def _SingleOrMergedLines(uwlines): 517 """Generate the lines we want to format. 518 519 Arguments: 520 uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format. 521 522 Yields: 523 Either a single line, if the current line cannot be merged with the 524 succeeding line, or the next two lines merged into one line. 525 """ 526 index = 0 527 last_was_merged = False 528 while index < len(uwlines): 529 if uwlines[index].disable: 530 uwline = uwlines[index] 531 index += 1 532 while index < len(uwlines): 533 column = uwline.last.column + 2 534 if uwlines[index].lineno != uwline.lineno: 535 break 536 if uwline.last.value != ':': 537 leaf = pytree.Leaf( 538 type=token.SEMI, value=';', context=('', (uwline.lineno, column))) 539 uwline.AppendToken(format_token.FormatToken(leaf)) 540 for tok in uwlines[index].tokens: 541 uwline.AppendToken(tok) 542 index += 1 543 yield uwline 544 elif line_joiner.CanMergeMultipleLines(uwlines[index:], last_was_merged): 545 # TODO(morbo): This splice is potentially very slow. Come up with a more 546 # performance-friendly way of determining if two lines can be merged. 547 next_uwline = uwlines[index + 1] 548 for tok in next_uwline.tokens: 549 uwlines[index].AppendToken(tok) 550 if (len(next_uwline.tokens) == 1 and 551 next_uwline.first.is_multiline_string): 552 # This may be a multiline shebang. In that case, we want to retain the 553 # formatting. Otherwise, it could mess up the shell script's syntax. 554 uwlines[index].disable = True 555 yield uwlines[index] 556 index += 2 557 last_was_merged = True 558 else: 559 yield uwlines[index] 560 index += 1 561 last_was_merged = False 562 563 564def _NoBlankLinesBeforeCurrentToken(text, cur_token, prev_token): 565 """Determine if there are no blank lines before the current token. 566 567 The previous token is a docstring or comment. The prev_token_lineno is the 568 start of the text of that token. Counting the number of newlines in its text 569 gives us the extent and thus where the line number of the end of the 570 docstring or comment. After that, we just compare it to the current token's 571 line number to see if there are blank lines between them. 572 573 Arguments: 574 text: (unicode) The text of the docstring or comment before the current 575 token. 576 cur_token: (format_token.FormatToken) The current token in the unwrapped 577 line. 578 prev_token: (format_token.FormatToken) The previous token in the unwrapped 579 line. 580 581 Returns: 582 True if there is no blank line before the current token. 583 """ 584 cur_token_lineno = cur_token.lineno 585 if cur_token.is_comment: 586 cur_token_lineno -= cur_token.value.count('\n') 587 num_newlines = text.count('\n') if not prev_token.is_comment else 0 588 return prev_token.lineno + num_newlines == cur_token_lineno - 1 589