• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Decide what the format for the code should be.
15
16The `unwrapped_line.UnwrappedLine`s are now ready to be formatted.
17UnwrappedLines that can be merged together are. The best formatting is returned
18as a string.
19
20  Reformat(): the main function exported by this module.
21"""
22
23from __future__ import unicode_literals
24import collections
25import heapq
26import re
27
28from lib2to3 import pytree
29from lib2to3.pgen2 import token
30
31from yapf.yapflib import format_decision_state
32from yapf.yapflib import format_token
33from yapf.yapflib import line_joiner
34from yapf.yapflib import pytree_utils
35from yapf.yapflib import style
36from yapf.yapflib import verifier
37
38
39def Reformat(uwlines, verify=False, lines=None):
40  """Reformat the unwrapped lines.
41
42  Arguments:
43    uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format.
44    verify: (bool) True if reformatted code should be verified for syntax.
45    lines: (set of int) The lines which can be modified or None if there is no
46      line range restriction.
47
48  Returns:
49    A string representing the reformatted code.
50  """
51  final_lines = []
52  prev_uwline = None  # The previous line.
53  indent_width = style.Get('INDENT_WIDTH')
54
55  for uwline in _SingleOrMergedLines(uwlines):
56    first_token = uwline.first
57    _FormatFirstToken(first_token, uwline.depth, prev_uwline, final_lines)
58
59    indent_amt = indent_width * uwline.depth
60    state = format_decision_state.FormatDecisionState(uwline, indent_amt)
61    state.MoveStateToNextToken()
62
63    if not uwline.disable:
64      if uwline.first.is_comment:
65        uwline.first.node.value = uwline.first.node.value.rstrip()
66      elif uwline.last.is_comment:
67        uwline.last.node.value = uwline.last.node.value.rstrip()
68      if prev_uwline and prev_uwline.disable:
69        # Keep the vertical spacing between a disabled and enabled formatting
70        # region.
71        _RetainRequiredVerticalSpacingBetweenTokens(uwline.first,
72                                                    prev_uwline.last, lines)
73      if any(tok.is_comment for tok in uwline.tokens):
74        _RetainVerticalSpacingBeforeComments(uwline)
75
76    if (_LineContainsI18n(uwline) or uwline.disable or
77        _LineHasContinuationMarkers(uwline)):
78      _RetainHorizontalSpacing(uwline)
79      _RetainRequiredVerticalSpacing(uwline, prev_uwline, lines)
80      _EmitLineUnformatted(state)
81    elif _CanPlaceOnSingleLine(uwline) and not any(tok.must_split
82                                                   for tok in uwline.tokens):
83      # The unwrapped line fits on one line.
84      while state.next_token:
85        state.AddTokenToState(newline=False, dry_run=False)
86    else:
87      if not _AnalyzeSolutionSpace(state):
88        # Failsafe mode. If there isn't a solution to the line, then just emit
89        # it as is.
90        state = format_decision_state.FormatDecisionState(uwline, indent_amt)
91        state.MoveStateToNextToken()
92        _RetainHorizontalSpacing(uwline)
93        _RetainRequiredVerticalSpacing(uwline, prev_uwline, None)
94        _EmitLineUnformatted(state)
95
96    final_lines.append(uwline)
97    prev_uwline = uwline
98  return _FormatFinalLines(final_lines, verify)
99
100
101def _RetainHorizontalSpacing(uwline):
102  """Retain all horizontal spacing between tokens."""
103  for tok in uwline.tokens:
104    tok.RetainHorizontalSpacing(uwline.first.column, uwline.depth)
105
106
107def _RetainRequiredVerticalSpacing(cur_uwline, prev_uwline, lines):
108  prev_tok = None
109  if prev_uwline is not None:
110    prev_tok = prev_uwline.last
111  for cur_tok in cur_uwline.tokens:
112    _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines)
113    prev_tok = cur_tok
114
115
116def _RetainRequiredVerticalSpacingBetweenTokens(cur_tok, prev_tok, lines):
117  """Retain vertical spacing between two tokens if not in editable range."""
118  if prev_tok is None:
119    return
120
121  if prev_tok.is_string:
122    prev_lineno = prev_tok.lineno + prev_tok.value.count('\n')
123  elif prev_tok.is_pseudo_paren:
124    if not prev_tok.previous_token.is_multiline_string:
125      prev_lineno = prev_tok.previous_token.lineno
126    else:
127      prev_lineno = prev_tok.lineno
128  else:
129    prev_lineno = prev_tok.lineno
130
131  if cur_tok.is_comment:
132    cur_lineno = cur_tok.lineno - cur_tok.value.count('\n')
133  else:
134    cur_lineno = cur_tok.lineno
135
136  if prev_tok.value.endswith('\\'):
137    prev_lineno += prev_tok.value.count('\n')
138
139  required_newlines = cur_lineno - prev_lineno
140  if cur_tok.is_comment and not prev_tok.is_comment:
141    # Don't adjust between a comment and non-comment.
142    pass
143  elif lines and (cur_lineno in lines or prev_lineno in lines):
144    desired_newlines = cur_tok.whitespace_prefix.count('\n')
145    whitespace_lines = range(prev_lineno + 1, cur_lineno)
146    deletable_lines = len(lines.intersection(whitespace_lines))
147    required_newlines = max(required_newlines - deletable_lines,
148                            desired_newlines)
149
150  cur_tok.AdjustNewlinesBefore(required_newlines)
151
152
153def _RetainVerticalSpacingBeforeComments(uwline):
154  """Retain vertical spacing before comments."""
155  prev_token = None
156  for tok in uwline.tokens:
157    if tok.is_comment and prev_token:
158      if tok.lineno - tok.value.count('\n') - prev_token.lineno > 1:
159        tok.AdjustNewlinesBefore(ONE_BLANK_LINE)
160
161    prev_token = tok
162
163
164def _EmitLineUnformatted(state):
165  """Emit the line without formatting.
166
167  The line contains code that if reformatted would break a non-syntactic
168  convention. E.g., i18n comments and function calls are tightly bound by
169  convention. Instead, we calculate when / if a newline should occur and honor
170  that. But otherwise the code emitted will be the same as the original code.
171
172  Arguments:
173    state: (format_decision_state.FormatDecisionState) The format decision
174      state.
175  """
176  prev_lineno = None
177  while state.next_token:
178    previous_token = state.next_token.previous_token
179    previous_lineno = previous_token.lineno
180
181    if previous_token.is_multiline_string or previous_token.is_string:
182      previous_lineno += previous_token.value.count('\n')
183
184    if previous_token.is_continuation:
185      newline = False
186    else:
187      newline = (
188          prev_lineno is not None and state.next_token.lineno > previous_lineno)
189
190    prev_lineno = state.next_token.lineno
191    state.AddTokenToState(newline=newline, dry_run=False)
192
193
194def _LineContainsI18n(uwline):
195  """Return true if there are i18n comments or function calls in the line.
196
197  I18n comments and pseudo-function calls are closely related. They cannot
198  be moved apart without breaking i18n.
199
200  Arguments:
201    uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted.
202
203  Returns:
204    True if the line contains i18n comments or function calls. False otherwise.
205  """
206  if style.Get('I18N_COMMENT'):
207    for tok in uwline.tokens:
208      if tok.is_comment and re.match(style.Get('I18N_COMMENT'), tok.value):
209        # Contains an i18n comment.
210        return True
211
212  if style.Get('I18N_FUNCTION_CALL'):
213    length = len(uwline.tokens)
214    index = 0
215    while index < length - 1:
216      if (uwline.tokens[index + 1].value == '(' and
217          uwline.tokens[index].value in style.Get('I18N_FUNCTION_CALL')):
218        return True
219      index += 1
220
221  return False
222
223
224def _LineHasContinuationMarkers(uwline):
225  """Return true if the line has continuation markers in it."""
226  return any(tok.is_continuation for tok in uwline.tokens)
227
228
229def _CanPlaceOnSingleLine(uwline):
230  """Determine if the unwrapped line can go on a single line.
231
232  Arguments:
233    uwline: (unwrapped_line.UnwrappedLine) The line currently being formatted.
234
235  Returns:
236    True if the line can or should be added to a single line. False otherwise.
237  """
238  indent_amt = style.Get('INDENT_WIDTH') * uwline.depth
239  last = uwline.last
240  last_index = -1
241  if last.is_pylint_comment or last.is_pytype_comment:
242    last = last.previous_token
243    last_index = -2
244  if last is None:
245    return True
246  return (last.total_length + indent_amt <= style.Get('COLUMN_LIMIT') and
247          not any(tok.is_comment for tok in uwline.tokens[:last_index]))
248
249
250def _FormatFinalLines(final_lines, verify):
251  """Compose the final output from the finalized lines."""
252  formatted_code = []
253  for line in final_lines:
254    formatted_line = []
255    for tok in line.tokens:
256      if not tok.is_pseudo_paren:
257        formatted_line.append(tok.whitespace_prefix)
258        formatted_line.append(tok.value)
259      else:
260        if (not tok.next_token.whitespace_prefix.startswith('\n') and
261            not tok.next_token.whitespace_prefix.startswith(' ')):
262          if (tok.previous_token.value == ':' or
263              tok.next_token.value not in ',}])'):
264            formatted_line.append(' ')
265
266    formatted_code.append(''.join(formatted_line))
267    if verify:
268      verifier.VerifyCode(formatted_code[-1])
269
270  return ''.join(formatted_code) + '\n'
271
272
273class _StateNode(object):
274  """An edge in the solution space from 'previous.state' to 'state'.
275
276  Attributes:
277    state: (format_decision_state.FormatDecisionState) The format decision state
278      for this node.
279    newline: If True, then on the edge from 'previous.state' to 'state' a
280      newline is inserted.
281    previous: (_StateNode) The previous state node in the graph.
282  """
283
284  # TODO(morbo): Add a '__cmp__' method.
285
286  def __init__(self, state, newline, previous):
287    self.state = state.Clone()
288    self.newline = newline
289    self.previous = previous
290
291  def __repr__(self):  # pragma: no cover
292    return 'StateNode(state=[\n{0}\n], newline={1})'.format(
293        self.state, self.newline)
294
295
296# A tuple of (penalty, count) that is used to prioritize the BFS. In case of
297# equal penalties, we prefer states that were inserted first. During state
298# generation, we make sure that we insert states first that break the line as
299# late as possible.
300_OrderedPenalty = collections.namedtuple('OrderedPenalty', ['penalty', 'count'])
301
302# An item in the prioritized BFS search queue. The 'StateNode's 'state' has
303# the given '_OrderedPenalty'.
304_QueueItem = collections.namedtuple('QueueItem',
305                                    ['ordered_penalty', 'state_node'])
306
307
308def _AnalyzeSolutionSpace(initial_state):
309  """Analyze the entire solution space starting from initial_state.
310
311  This implements a variant of Dijkstra's algorithm on the graph that spans
312  the solution space (LineStates are the nodes). The algorithm tries to find
313  the shortest path (the one with the lowest penalty) from 'initial_state' to
314  the state where all tokens are placed.
315
316  Arguments:
317    initial_state: (format_decision_state.FormatDecisionState) The initial state
318      to start the search from.
319
320  Returns:
321    True if a formatting solution was found. False otherwise.
322  """
323  count = 0
324  seen = set()
325  p_queue = []
326
327  # Insert start element.
328  node = _StateNode(initial_state, False, None)
329  heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(0, count), node))
330
331  count += 1
332  while p_queue:
333    item = p_queue[0]
334    penalty = item.ordered_penalty.penalty
335    node = item.state_node
336    if not node.state.next_token:
337      break
338    heapq.heappop(p_queue)
339
340    if count > 10000:
341      node.state.ignore_stack_for_comparison = True
342
343    if node.state in seen:
344      continue
345
346    seen.add(node.state)
347
348    # FIXME(morbo): Add a 'decision' element?
349
350    count = _AddNextStateToQueue(penalty, node, False, count, p_queue)
351    count = _AddNextStateToQueue(penalty, node, True, count, p_queue)
352
353  if not p_queue:
354    # We weren't able to find a solution. Do nothing.
355    return False
356
357  _ReconstructPath(initial_state, heapq.heappop(p_queue).state_node)
358  return True
359
360
361def _AddNextStateToQueue(penalty, previous_node, newline, count, p_queue):
362  """Add the following state to the analysis queue.
363
364  Assume the current state is 'previous_node' and has been reached with a
365  penalty of 'penalty'. Insert a line break if 'newline' is True.
366
367  Arguments:
368    penalty: (int) The penalty associated with the path up to this point.
369    previous_node: (_StateNode) The last _StateNode inserted into the priority
370      queue.
371    newline: (bool) Add a newline if True.
372    count: (int) The number of elements in the queue.
373    p_queue: (heapq) The priority queue representing the solution space.
374
375  Returns:
376    The updated number of elements in the queue.
377  """
378  must_split = previous_node.state.MustSplit()
379  if newline and not previous_node.state.CanSplit(must_split):
380    # Don't add a newline if the token cannot be split.
381    return count
382  if not newline and must_split:
383    # Don't add a token we must split but where we aren't splitting.
384    return count
385
386  node = _StateNode(previous_node.state, newline, previous_node)
387  penalty += node.state.AddTokenToState(
388      newline=newline, dry_run=True, must_split=must_split)
389  heapq.heappush(p_queue, _QueueItem(_OrderedPenalty(penalty, count), node))
390  return count + 1
391
392
393def _ReconstructPath(initial_state, current):
394  """Reconstruct the path through the queue with lowest penalty.
395
396  Arguments:
397    initial_state: (format_decision_state.FormatDecisionState) The initial state
398      to start the search from.
399    current: (_StateNode) The node in the decision graph that is the end point
400      of the path with the least penalty.
401  """
402  path = collections.deque()
403
404  while current.previous:
405    path.appendleft(current)
406    current = current.previous
407
408  for node in path:
409    initial_state.AddTokenToState(newline=node.newline, dry_run=False)
410
411
412def _FormatFirstToken(first_token, indent_depth, prev_uwline, final_lines):
413  """Format the first token in the unwrapped line.
414
415  Add a newline and the required indent before the first token of the unwrapped
416  line.
417
418  Arguments:
419    first_token: (format_token.FormatToken) The first token in the unwrapped
420      line.
421    indent_depth: (int) The line's indentation depth.
422    prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line
423      previous to this line.
424    final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines
425      that have already been processed.
426  """
427  first_token.AddWhitespacePrefix(
428      _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline,
429                                 final_lines),
430      indent_level=indent_depth)
431
432
433NO_BLANK_LINES = 1
434ONE_BLANK_LINE = 2
435TWO_BLANK_LINES = 3
436
437
438def _IsClassOrDef(uwline):
439  if uwline.first.value in {'class', 'def'}:
440    return True
441
442  return [t.value for t in uwline.tokens[:2]] == ['async', 'def']
443
444
445def _CalculateNumberOfNewlines(first_token, indent_depth, prev_uwline,
446                               final_lines):
447  """Calculate the number of newlines we need to add.
448
449  Arguments:
450    first_token: (format_token.FormatToken) The first token in the unwrapped
451      line.
452    indent_depth: (int) The line's indentation depth.
453    prev_uwline: (list of unwrapped_line.UnwrappedLine) The unwrapped line
454      previous to this line.
455    final_lines: (list of unwrapped_line.UnwrappedLine) The unwrapped lines
456      that have already been processed.
457
458  Returns:
459    The number of newlines needed before the first token.
460  """
461  # TODO(morbo): Special handling for imports.
462  # TODO(morbo): Create a knob that can tune these.
463  if prev_uwline is None:
464    # The first line in the file. Don't add blank lines.
465    # FIXME(morbo): Is this correct?
466    if first_token.newlines is not None:
467      pytree_utils.SetNodeAnnotation(first_token.node,
468                                     pytree_utils.Annotation.NEWLINES, None)
469    return 0
470
471  if first_token.is_docstring:
472    if (prev_uwline.first.value == 'class' and
473        style.Get('BLANK_LINE_BEFORE_CLASS_DOCSTRING')):
474      # Enforce a blank line before a class's docstring.
475      return ONE_BLANK_LINE
476    elif (prev_uwline.first.value.startswith('#') and
477          style.Get('BLANK_LINE_BEFORE_MODULE_DOCSTRING')):
478      # Enforce a blank line before a module's docstring.
479      return ONE_BLANK_LINE
480    # The docstring shouldn't have a newline before it.
481    return NO_BLANK_LINES
482
483  prev_last_token = prev_uwline.last
484  if prev_last_token.is_docstring:
485    if (not indent_depth and first_token.value in {'class', 'def', 'async'}):
486      # Separate a class or function from the module-level docstring with
487      # appropriate number of blank lines.
488      return 1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION')
489    if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token,
490                                       prev_last_token):
491      return NO_BLANK_LINES
492    else:
493      return ONE_BLANK_LINE
494
495  if first_token.value in {'class', 'def', 'async', '@'}:
496    # TODO(morbo): This can go once the blank line calculator is more
497    # sophisticated.
498    if not indent_depth:
499      # This is a top-level class or function.
500      is_inline_comment = prev_last_token.whitespace_prefix.count('\n') == 0
501      if (not prev_uwline.disable and prev_last_token.is_comment and
502          not is_inline_comment):
503        # This token follows a non-inline comment.
504        if _NoBlankLinesBeforeCurrentToken(prev_last_token.value, first_token,
505                                           prev_last_token):
506          # Assume that the comment is "attached" to the current line.
507          # Therefore, we want two blank lines before the comment.
508          index = len(final_lines) - 1
509          while index > 0:
510            if not final_lines[index - 1].is_comment:
511              break
512            index -= 1
513          if final_lines[index - 1].first.value == '@':
514            final_lines[index].first.AdjustNewlinesBefore(NO_BLANK_LINES)
515          else:
516            prev_last_token.AdjustNewlinesBefore(
517                1 + style.Get('BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION'))
518          if first_token.newlines is not None:
519            pytree_utils.SetNodeAnnotation(
520                first_token.node, pytree_utils.Annotation.NEWLINES, None)
521          return NO_BLANK_LINES
522    elif _IsClassOrDef(prev_uwline):
523      if not style.Get('BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF'):
524        pytree_utils.SetNodeAnnotation(first_token.node,
525                                       pytree_utils.Annotation.NEWLINES, None)
526        return NO_BLANK_LINES
527
528  # Calculate how many newlines were between the original lines. We want to
529  # retain that formatting if it doesn't violate one of the style guide rules.
530  if first_token.is_comment:
531    first_token_lineno = first_token.lineno - first_token.value.count('\n')
532  else:
533    first_token_lineno = first_token.lineno
534
535  prev_last_token_lineno = prev_last_token.lineno
536  if prev_last_token.is_multiline_string:
537    prev_last_token_lineno += prev_last_token.value.count('\n')
538
539  if first_token_lineno - prev_last_token_lineno > 1:
540    return ONE_BLANK_LINE
541
542  return NO_BLANK_LINES
543
544
545def _SingleOrMergedLines(uwlines):
546  """Generate the lines we want to format.
547
548  Arguments:
549    uwlines: (list of unwrapped_line.UnwrappedLine) Lines we want to format.
550
551  Yields:
552    Either a single line, if the current line cannot be merged with the
553    succeeding line, or the next two lines merged into one line.
554  """
555  index = 0
556  last_was_merged = False
557  while index < len(uwlines):
558    if uwlines[index].disable:
559      uwline = uwlines[index]
560      index += 1
561      while index < len(uwlines):
562        column = uwline.last.column + 2
563        if uwlines[index].lineno != uwline.lineno:
564          break
565        if uwline.last.value != ':':
566          leaf = pytree.Leaf(
567              type=token.SEMI, value=';', context=('', (uwline.lineno, column)))
568          uwline.AppendToken(format_token.FormatToken(leaf))
569        for tok in uwlines[index].tokens:
570          uwline.AppendToken(tok)
571        index += 1
572      yield uwline
573    elif line_joiner.CanMergeMultipleLines(uwlines[index:], last_was_merged):
574      # TODO(morbo): This splice is potentially very slow. Come up with a more
575      # performance-friendly way of determining if two lines can be merged.
576      next_uwline = uwlines[index + 1]
577      for tok in next_uwline.tokens:
578        uwlines[index].AppendToken(tok)
579      if (len(next_uwline.tokens) == 1 and
580          next_uwline.first.is_multiline_string):
581        # This may be a multiline shebang. In that case, we want to retain the
582        # formatting. Otherwise, it could mess up the shell script's syntax.
583        uwlines[index].disable = True
584      yield uwlines[index]
585      index += 2
586      last_was_merged = True
587    else:
588      yield uwlines[index]
589      index += 1
590      last_was_merged = False
591
592
593def _NoBlankLinesBeforeCurrentToken(text, cur_token, prev_token):
594  """Determine if there are no blank lines before the current token.
595
596  The previous token is a docstring or comment. The prev_token_lineno is the
597  start of the text of that token. Counting the number of newlines in its text
598  gives us the extent and thus where the line number of the end of the
599  docstring or comment. After that, we just compare it to the current token's
600  line number to see if there are blank lines between them.
601
602  Arguments:
603    text: (unicode) The text of the docstring or comment before the current
604      token.
605    cur_token: (format_token.FormatToken) The current token in the unwrapped
606      line.
607    prev_token: (format_token.FormatToken) The previous token in the unwrapped
608      line.
609
610  Returns:
611    True if there is no blank line before the current token.
612  """
613  cur_token_lineno = cur_token.lineno
614  if cur_token.is_comment:
615    cur_token_lineno -= cur_token.value.count('\n')
616  num_newlines = text.count('\n') if not prev_token.is_comment else 0
617  return prev_token.lineno + num_newlines == cur_token_lineno - 1
618