• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015-2017 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Decide what the format for the code should be.
15
16The `unwrapped_line.UnwrappedLine`s are now ready to be formatted.
17UnwrappedLines that can be merged together are. The best formatting is returned
18as a string.
19
20  Reformat(): the main function exported by this module.
21"""
22
23from __future__ import unicode_literals
24import collections
25import heapq
26import re
27
28from lib2to3 import pytree
29from lib2to3.pgen2 import token
30
31from yapf.yapflib import format_decision_state
32from yapf.yapflib import format_token
33from yapf.yapflib import line_joiner
34from yapf.yapflib import pytree_utils
35from yapf.yapflib import style
36from yapf.yapflib import verifier
37
38
39def Reformat(uwlines, verify=False):
40  """Reformat the unwrapped lines.
41
42  Arguments:
43    uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format.
44    verify: (bool) True if reformatted code should be verified for syntax.
45
46  Returns:
47    A string representing the reformatted code.
48  """
49  final_lines = []
50  prev_uwline = None  # The previous line.
51  indent_width = style.Get('INDENT_WIDTH')
52
53  for uwline in _SingleOrMergedLines(uwlines):
54    first_token = uwline.first
55    _FormatFirstToken(first_token, uwline.depth, prev_uwline, final_lines)
56
57    indent_amt = indent_width * uwline.depth
58    state = format_decision_state.FormatDecisionState(uwline, indent_amt)
59    state.MoveStateToNextToken()
60
61    if not uwline.disable:
62      if uwline.first.is_comment:
63        uwline.first.node.value = uwline.first.node.value.rstrip()
64      elif uwline.last.is_comment:
65        uwline.last.node.value = uwline.last.node.value.rstrip()
66      if prev_uwline and prev_uwline.disable:
67        # Keep the vertical spacing between a disabled and enabled formatting
68        # region.
69        _RetainVerticalSpacingBetweenTokens(uwline.first, prev_uwline.last)
70      if any(tok.is_comment for tok in uwline.tokens):
71        _RetainVerticalSpacingBeforeComments(uwline)
72
73    if (_LineContainsI18n(uwline) or uwline.disable or
74        _LineHasContinuationMarkers(uwline)):
75      _RetainHorizontalSpacing(uwline)
76      _RetainVerticalSpacing(uwline, prev_uwline)
77      _EmitLineUnformatted(state)
78    elif _CanPlaceOnSingleLine(uwline) and not any(tok.must_split
79                                                   for tok in uwline.tokens):
80      # The unwrapped line fits on one line.
81      while state.next_token:
82        state.AddTokenToState(newline=False, dry_run=False)
83    else:
84      if not _AnalyzeSolutionSpace(state):
85        # Failsafe mode. If there isn't a solution to the line, then just emit
86        # it as is.
87        state = format_decision_state.FormatDecisionState(uwline, indent_amt)
88        state.MoveStateToNextToken()
89        _RetainHorizontalSpacing(uwline)
90        _RetainVerticalSpacing(uwline, prev_uwline)
91        _EmitLineUnformatted(state)
92
93    final_lines.append(uwline)
94    prev_uwline = uwline
95  return _FormatFinalLines(final_lines, verify)
96
97
98def _RetainHorizontalSpacing(uwline):
99  """Retain all horizontal spacing between tokens."""
100  for tok in uwline.tokens:
101    tok.RetainHorizontalSpacing(uwline.first.column, uwline.depth)
102
103
104def _RetainVerticalSpacing(cur_uwline, prev_uwline):
105  prev_tok = None
106  if prev_uwline is not None:
107    prev_tok = prev_uwline.last
108  for cur_tok in cur_uwline.tokens:
109    _RetainVerticalSpacingBetweenTokens(cur_tok, prev_tok)
110    prev_tok = cur_tok
111
112
113def _RetainVerticalSpacingBetweenTokens(cur_tok, prev_tok):
114  """Retain vertical spacing between two tokens."""
115  if prev_tok is None:
116    return
117
118  if prev_tok.is_string:
119    prev_lineno = prev_tok.lineno + prev_tok.value.count('\n')
120  elif prev_tok.is_pseudo_paren:
121    if not prev_tok.previous_token.is_multiline_string:
122      prev_lineno = prev_tok.previous_token.lineno
123    else:
124      prev_lineno = prev_tok.lineno
125  else:
126    prev_lineno = prev_tok.lineno
127
128  if cur_tok.is_comment:
129    cur_lineno = cur_tok.lineno - cur_tok.value.count('\n')
130  else:
131    cur_lineno = cur_tok.lineno
132
133  cur_tok.AdjustNewlinesBefore(cur_lineno - prev_lineno)
134
135
136def _RetainVerticalSpacingBeforeComments(uwline):
137  """Retain vertical spacing before comments."""
138  prev_token = None
139  for tok in uwline.tokens:
140    if tok.is_comment and prev_token:
141      if tok.lineno - tok.value.count('\n') - prev_token.lineno > 1:
142        tok.AdjustNewlinesBefore(ONE_BLANK_LINE)
143
144    prev_token = tok
145
146
147def _EmitLineUnformatted(state):
148  """Emit the line without formatting.
149
150  The line contains code that if reformatted would break a non-syntactic
151  convention. E.g., i18n comments and function calls are tightly bound by
152  convention. Instead, we calculate when / if a newline should occur and honor
153  that. But otherwise the code emitted will be the same as the original code.
154
155  Arguments:
156    state: (format_decision_state.FormatDecisionState) The format decision
157      state.
158  """
159  prev_lineno = None
160  while state.next_token:
161    previous_token = state.next_token.previous_token
162    previous_lineno = previous_token.lineno
163
164    if previous_token.is_multiline_string:
165      previous_lineno += previous_token.value.count('\n')
166
167    if previous_token.is_continuation:
168      newline = False
169    else:
170      newline = (prev_lineno is not None and
171                 state.next_token.lineno > previous_lineno)
172
173    prev_lineno = state.next_token.lineno
174    state.AddTokenToState(newline=newline, dry_run=False)
175
176
177def _LineContainsI18n(uwline):
178  """Return true if there are i18n comments or function calls in the line.
179
180  I18n comments and pseudo-function calls are closely related. They cannot
181  be moved apart without breaking i18n.
182
183  Arguments:
184    uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted.
185
186  Returns:
187    True if the line contains i18n comments or function calls. False otherwise.
188  """
189  if style.Get('I18N_COMMENT'):
190    for tok in uwline.tokens:
191      if tok.is_comment and re.match(style.Get('I18N_COMMENT'), tok.value):
192        # Contains an i18n comment.
193        return True
194
195  if style.Get('I18N_FUNCTION_CALL'):
196    length = len(uwline.tokens)
197    index = 0
198    while index < length - 1:
199      if (uwline.tokens[index + 1].value == '(' and
200          uwline.tokens[index].value in style.Get('I18N_FUNCTION_CALL')):
201        return True
202      index += 1
203
204  return False
205
206
207def _LineHasContinuationMarkers(uwline):
208  """Return true if the line has continuation markers in it."""
209  return any(tok.is_continuation for tok in uwline.tokens)
210
211
212def _CanPlaceOnSingleLine(uwline):
213  """Determine if the unwrapped line can go on a single line.
214
215  Arguments:
216    uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted.
217
218  Returns:
219    True if the line can or should be added to a single line. False otherwise.
220  """
221  indent_amt = style.Get('INDENT_WIDTH') * uwline.depth
222  last = uwline.last
223  last_index = -1
224  if last.is_pylint_comment:
225    last = last.previous_token
226    last_index = -2
227  if last is None:
228    return True
229  return (last.total_length + indent_amt <= style.Get('COLUMN_LIMIT') and
230          not any(tok.is_comment for tok in uwline.tokens[:last_index]))
231
232
233def _FormatFinalLines(final_lines, verify):
234  """Compose the final output from the finalized lines."""
235  formatted_code = []
236  for line in final_lines:
237    formatted_line = []
238    for tok in line.tokens:
239      if not tok.is_pseudo_paren:
240        formatted_line.append(tok.whitespace_prefix)
241        formatted_line.append(tok.value)
242      else:
243        if (not tok.next_token.whitespace_prefix.startswith('\n') and
244            not tok.next_token.whitespace_prefix.startswith(' ')):
245          if (tok.previous_token.value == ':' or
246              tok.next_token.value not in ',}])'):
247            formatted_line.append(' ')
248
249    formatted_code.append(''.join(formatted_line))
250    if verify:
251      verifier.VerifyCode(formatted_code[-1])
252
253  return ''.join(formatted_code) + '\n'
254
255
256class _StateNode(object):
257  """An edge in the solution space from 'previous.state' to 'state'.
258
259  Attributes:
260    state: (format_decision_state.FormatDecisionState) The format decision state
261      for this node.
262    newline: If True, then on the edge from 'previous.state' to 'state' a
263      newline is inserted.
264    previous: (_StateNode) The previous state node in the graph.
265  """
266
267  # TODO(morbo): Add a '__cmp__' method.
268
269  def __init__(self, state, newline, previous):
270    self.state = state.Clone()
271    self.newline = newline
272    self.previous = previous
273
274  def __repr__(self):  # pragma: no cover
275    return 'StateNode(state=[\n{0}\n], newline={1})'.format(
276        self.state, self.newline)
277
278
279# A tuple of (penalty, count) that is used to prioritize the BFS. In case of
280# equal penalties, we prefer states that were inserted first. During state
281# generation, we make sure that we insert states first that break the line as
282# late as possible.
283_OrderedPenalty = collections.namedtuple('OrderedPenalty', ['penalty', 'count'])
284
285# An item in the prioritized BFS search queue. The 'StateNode's 'state' has
286# the given '_OrderedPenalty'.
287_QueueItem = collections.namedtuple('QueueItem',
288                                    ['ordered_penalty', 'state_node'])
289
290
291def _AnalyzeSolutionSpace(initial_state):
292  """Analyze the entire solution space starting from initial_state.
293
294  This implements a variant of Dijkstra's algorithm on the graph that spans
295  the solution space (LineStates are the nodes). The algorithm tries to find
296  the shortest path (the one with the lowest penalty) from 'initial_state' to
297  the state where all tokens are placed.
298
299  Arguments:
300    initial_state: (format_decision_state.FormatDecisionState) The initial state
301      to start the search from.
302
303  Returns:
304    True if a formatting solution was found. False otherwise.
305  """
306  count = 0
307  seen = set()
308  p_queue = []
309
310  # Insert start element.
311  node = _StateNode(initial_state, False, None)
312  heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(0, count), node))
313
314  count += 1
315  while p_queue:
316    item = p_queue[0]
317    penalty = item.ordered_penalty.penalty
318    node = item.state_node
319    if not node.state.next_token:
320      break
321    heapq.heappop(p_queue)
322
323    if count > 10000:
324      node.state.ignore_stack_for_comparison = True
325
326    if node.state in seen:
327      continue
328
329    seen.add(node.state)
330
331    # FIXME(morbo): Add a 'decision' element?
332
333    count = _AddNextStateToQueue(penalty, node, False, count, p_queue)
334    count = _AddNextStateToQueue(penalty, node, True, count, p_queue)
335
336  if not p_queue:
337    # We weren't able to find a solution. Do nothing.
338    return False
339
340  _ReconstructPath(initial_state, heapq.heappop(p_queue).state_node)
341  return True
342
343
344def _AddNextStateToQueue(penalty, previous_node, newline, count, p_queue):
345  """Add the following state to the analysis queue.
346
347  Assume the current state is 'previous_node' and has been reached with a
348  penalty of 'penalty'. Insert a line break if 'newline' is True.
349
350  Arguments:
351    penalty: (int) The penalty associated with the path up to this point.
352    previous_node: (_StateNode) The last _StateNode inserted into the priority
353      queue.
354    newline: (bool) Add a newline if True.
355    count: (int) The number of elements in the queue.
356    p_queue: (heapq) The priority queue representing the solution space.
357
358  Returns:
359    The updated number of elements in the queue.
360  """
361  must_split = previous_node.state.MustSplit()
362  if newline and not previous_node.state.CanSplit(must_split):
363    # Don't add a newline if the token cannot be split.
364    return count
365  if not newline and must_split:
366    # Don't add a token we must split but where we aren't splitting.
367    return count
368
369  node = _StateNode(previous_node.state, newline, previous_node)
370  penalty += node.state.AddTokenToState(
371      newline=newline, dry_run=True, must_split=must_split)
372  heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(penalty, count), node))
373  return count + 1
374
375
376def _ReconstructPath(initial_state, current):
377  """Reconstruct the path through the queue with lowest penalty.
378
379  Arguments:
380    initial_state: (format_decision_state.FormatDecisionState) The initial state
381      to start the search from.
382    current: (_StateNode) The node in the decision graph that is the end point
383      of the path with the least penalty.
384  """
385  path = collections.deque()
386
387  while current.previous:
388    path.appendleft(current)
389    current = current.previous
390
391  for node in path:
392    initial_state.AddTokenToState(newline=node.newline, dry_run=False)
393
394
395def _FormatFirstToken(first_token, indent_depth, prev_uwline, final_lines):
396  """Format the first token in the unwrapped line.
397
398  Add a newline and the required indent before the first token of the unwrapped
399  line.
400
401  Arguments:
402    first_token: (format_token.FormatToken) The first token in the unwrapped
403      line.
404    indent_depth: (int) The line's indentation depth.
405    prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line
406      previous to this line.
407    final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines
408      that have already been processed.
409  """
410  first_token.AddWhitespacePrefix(
411      _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline,
412                                 final_lines),
413      indent_level=indent_depth)
414
415
416NO_BLANK_LINES = 1
417ONE_BLANK_LINE = 2
418TWO_BLANK_LINES = 3
419
420
421def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline,
422                               final_lines):
423  """Calculate the number of newlines we need to add.
424
425  Arguments:
426    first_token: (format_token.FormatToken) The first token in the unwrapped
427      line.
428    indent_depth: (int) The line's indentation depth.
429    prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line
430      previous to this line.
431    final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines
432      that have already been processed.
433
434  Returns:
435    The number of newlines needed before the first token.
436  """
437  # TODO(morbo): Special handling for imports.
438  # TODO(morbo): Create a knob that can tune these.
439  if prev_uwline is None:
440    # The first line in the file. Don't add blank lines.
441    # FIXME(morbo): Is this correct?
442    if first_token.newlines is not None:
443      pytree_utils.SetNodeAnnotation(first_token.node,
444                                     pytree_utils.Annotation.NEWLINES, None)
445    return 0
446
447  if first_token.is_docstring:
448    if (prev_uwline.first.value == 'class' and
449        style.Get('BLANK_LINE_BEFORE_CLASS_DOCSTRING')):
450      # Enforce a blank line before a class's docstring.
451      return ONE_BLANK_LINE
452    # The docstring shouldn't have a newline before it.
453    return NO_BLANK_LINES
454
455  prev_last_token = prev_uwline.last
456  if prev_last_token.is_docstring:
457    if (not indent_depth and first_token.value in {'class', 'def', 'async'}):
458      # Separate a class or function from the module-level docstring with two
459      # blank lines.
460      return TWO_BLANK_LINES
461    if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token,
462                                       prev_last_token):
463      return NO_BLANK_LINES
464    else:
465      return ONE_BLANK_LINE
466
467  if first_token.value in {'class', 'def', 'async', '@'}:
468    # TODO(morbo): This can go once the blank line calculator is more
469    # sophisticated.
470    if not indent_depth:
471      # This is a top-level class or function.
472      is_inline_comment = prev_last_token.whitespace_prefix.count('\n') == 0
473      if (not prev_uwline.disable and prev_last_token.is_comment and
474          not is_inline_comment):
475        # This token follows a non-inline comment.
476        if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token,
477                                           prev_last_token):
478          # Assume that the comment is "attached" to the current line.
479          # Therefore, we want two blank lines before the comment.
480          index = len(final_lines) - 1
481          while index > 0:
482            if not final_lines[index - 1].is_comment:
483              break
484            index -= 1
485          if final_lines[index - 1].first.value == '@':
486            final_lines[index].first.AdjustNewlinesBefore(NO_BLANK_LINES)
487          else:
488            prev_last_token.AdjustNewlinesBefore(TWO_BLANK_LINES)
489          if first_token.newlines is not None:
490            pytree_utils.SetNodeAnnotation(
491                first_token.node, pytree_utils.Annotation.NEWLINES, None)
492          return NO_BLANK_LINES
493    elif prev_uwline.first.value in {'class', 'def', 'async'}:
494      if not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'):
495        pytree_utils.SetNodeAnnotation(first_token.node,
496                                       pytree_utils.Annotation.NEWLINES, None)
497        return NO_BLANK_LINES
498
499  # Calculate how many newlines were between the original lines. We want to
500  # retain that formatting if it doesn't violate one of the style guide rules.
501  if first_token.is_comment:
502    first_token_lineno = first_token.lineno - first_token.value.count('\n')
503  else:
504    first_token_lineno = first_token.lineno
505
506  prev_last_token_lineno = prev_last_token.lineno
507  if prev_last_token.is_multiline_string:
508    prev_last_token_lineno += prev_last_token.value.count('\n')
509
510  if first_token_lineno - prev_last_token_lineno > 1:
511    return ONE_BLANK_LINE
512
513  return NO_BLANK_LINES
514
515
516def _SingleOrMergedLines(uwlines):
517  """Generate the lines we want to format.
518
519  Arguments:
520    uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format.
521
522  Yields:
523    Either a single line, if the current line cannot be merged with the
524    succeeding line, or the next two lines merged into one line.
525  """
526  index = 0
527  last_was_merged = False
528  while index < len(uwlines):
529    if uwlines[index].disable:
530      uwline = uwlines[index]
531      index += 1
532      while index < len(uwlines):
533        column = uwline.last.column + 2
534        if uwlines[index].lineno != uwline.lineno:
535          break
536        if uwline.last.value != ':':
537          leaf = pytree.Leaf(
538              type=token.SEMI, value=';', context=('', (uwline.lineno, column)))
539          uwline.AppendToken(format_token.FormatToken(leaf))
540        for tok in uwlines[index].tokens:
541          uwline.AppendToken(tok)
542        index += 1
543      yield uwline
544    elif line_joiner.CanMergeMultipleLines(uwlines[index:], last_was_merged):
545      # TODO(morbo): This splice is potentially very slow. Come up with a more
546      # performance-friendly way of determining if two lines can be merged.
547      next_uwline = uwlines[index + 1]
548      for tok in next_uwline.tokens:
549        uwlines[index].AppendToken(tok)
550      if (len(next_uwline.tokens) == 1 and
551          next_uwline.first.is_multiline_string):
552        # This may be a multiline shebang. In that case, we want to retain the
553        # formatting. Otherwise, it could mess up the shell script's syntax.
554        uwlines[index].disable = True
555      yield uwlines[index]
556      index += 2
557      last_was_merged = True
558    else:
559      yield uwlines[index]
560      index += 1
561      last_was_merged = False
562
563
564def _NoBlankLinesBeforeCurrentToken(text, cur_token, prev_token):
565  """Determine if there are no blank lines before the current token.
566
567  The previous token is a docstring or comment. The prev_token_lineno is the
568  start of the text of that token. Counting the number of newlines in its text
569  gives us the extent and thus where the line number of the end of the
570  docstring or comment. After that, we just compare it to the current token's
571  line number to see if there are blank lines between them.
572
573  Arguments:
574    text: (unicode) The text of the docstring or comment before the current
575      token.
576    cur_token: (format_token.FormatToken) The current token in the unwrapped
577      line.
578    prev_token: (format_token.FormatToken) The previous token in the unwrapped
579      line.
580
581  Returns:
582    True if there is no blank line before the current token.
583  """
584  cur_token_lineno = cur_token.lineno
585  if cur_token.is_comment:
586    cur_token_lineno -= cur_token.value.count('\n')
587  num_newlines = text.count('\n') if not prev_token.is_comment else 0
588  return prev_token.lineno + num_newlines == cur_token_lineno - 1
589