• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#===----------------------------------------------------------------------===##
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9#===----------------------------------------------------------------------===##
10#
11# !!!!!!!!!!!! NOTE !!!!!!!!!!!!
12# This is copied directly from upstream LLVM. Please make any changes upstream,
13# rather than to this file directly. Once changes are made there, you're free
14# to integrate them here.
15
16"""Checks for reverts of commits across a given git commit.
17
18To clarify the meaning of 'across' with an example, if we had the following
19commit history (where `a -> b` notes that `b` is a direct child of `a`):
20
21123abc -> 223abc -> 323abc -> 423abc -> 523abc
22
23And where 423abc is a revert of 223abc, this revert is considered to be 'across'
24323abc. More generally, a revert A of a parent commit B is considered to be
25'across' a commit C if C is a parent of A and B is a parent of C.
26
27Please note that revert detection in general is really difficult, since merge
28conflicts/etc always introduce _some_ amount of fuzziness. This script just
29uses a bundle of heuristics, and is bound to ignore / incorrectly flag some
30reverts. The hope is that it'll easily catch the vast majority (>90%) of them,
31though.
32
33This is designed to be used in one of two ways: an import in Python, or run
34directly from a shell. If you want to import this, the `find_reverts`
35function is the thing to look at. If you'd rather use this from a shell, have a
36usage example:
37
38```
39./revert_checker.py c47f97169 origin/main origin/release/12.x
40```
41
42This checks for all reverts from the tip of origin/main to c47f97169, which are
43across the latter. It then does the same for origin/release/12.x to c47f97169.
44Duplicate reverts discovered when walking both roots (origin/main and
45origin/release/12.x) are deduplicated in output.
46"""
47
48import argparse
49import collections
50import logging
51import re
52import subprocess
53import sys
54from typing import Generator, List, NamedTuple, Iterable
55
56assert sys.version_info >= (3, 6), 'Only Python 3.6+ is supported.'
57
58# People are creative with their reverts, and heuristics are a bit difficult.
59# Like 90% of of reverts have "This reverts commit ${full_sha}".
60# Some lack that entirely, while others have many of them specified in ad-hoc
61# ways, while others use short SHAs and whatever.
62#
63# The 90% case is trivial to handle (and 100% free + automatic). The extra 10%
64# starts involving human intervention, which is probably not worth it for now.
65
66
67def _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]:
68  if not commit_message:
69    return []
70
71  results = re.findall(r'This reverts commit ([a-f0-9]{40})\b', commit_message)
72
73  first_line = commit_message.splitlines()[0]
74  initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line)
75  if initial_revert:
76    results.append(initial_revert.group(1))
77  return results
78
79
80def _stream_stdout(command: List[str]) -> Generator[str, None, None]:
81  with subprocess.Popen(
82      command, stdout=subprocess.PIPE, encoding='utf-8', errors='replace') as p:
83    assert p.stdout is not None  # for mypy's happiness.
84    yield from p.stdout
85
86
87def _resolve_sha(git_dir: str, sha: str) -> str:
88  if len(sha) == 40:
89    return sha
90
91  return subprocess.check_output(
92      ['git', '-C', git_dir, 'rev-parse', sha],
93      encoding='utf-8',
94      stderr=subprocess.DEVNULL,
95  ).strip()
96
97
98_LogEntry = NamedTuple('_LogEntry', [
99    ('sha', str),
100    ('commit_message', str),
101])
102
103
104def _log_stream(git_dir: str, root_sha: str,
105                end_at_sha: str) -> Iterable[_LogEntry]:
106  sep = 50 * '<>'
107  log_command = [
108      'git',
109      '-C',
110      git_dir,
111      'log',
112      '^' + end_at_sha,
113      root_sha,
114      '--format=' + sep + '%n%H%n%B%n',
115  ]
116
117  stdout_stream = iter(_stream_stdout(log_command))
118
119  # Find the next separator line. If there's nothing to log, it may not exist.
120  # It might not be the first line if git feels complainy.
121  found_commit_header = False
122  for line in stdout_stream:
123    if line.rstrip() == sep:
124      found_commit_header = True
125      break
126
127  while found_commit_header:
128    sha = next(stdout_stream, None)
129    assert sha is not None, 'git died?'
130    sha = sha.rstrip()
131
132    commit_message = []
133
134    found_commit_header = False
135    for line in stdout_stream:
136      line = line.rstrip()
137      if line.rstrip() == sep:
138        found_commit_header = True
139        break
140      commit_message.append(line)
141
142    yield _LogEntry(sha, '\n'.join(commit_message).rstrip())
143
144
145def _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]:
146  rev_list = [
147      'git',
148      '-C',
149      git_dir,
150      'rev-list',
151      '--first-parent',
152      f'{base_ref}..{head_ref}',
153  ]
154  return (x.strip() for x in _stream_stdout(rev_list))
155
156
157def _rev_parse(git_dir: str, ref: str) -> str:
158  return subprocess.check_output(
159      ['git', '-C', git_dir, 'rev-parse', ref],
160      encoding='utf-8',
161  ).strip()
162
163
164Revert = NamedTuple('Revert', [
165    ('sha', str),
166    ('reverted_sha', str),
167])
168
169
170def _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str:
171  """Finds the closest common parent commit between `ref_a` and `ref_b`."""
172  return subprocess.check_output(
173      ['git', '-C', git_dir, 'merge-base', ref_a, ref_b],
174      encoding='utf-8',
175  ).strip()
176
177
178def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]:
179  """Finds reverts across `across_ref` in `git_dir`, starting from `root`.
180
181  These reverts are returned in order of oldest reverts first.
182  """
183  across_sha = _rev_parse(git_dir, across_ref)
184  root_sha = _rev_parse(git_dir, root)
185
186  common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha)
187  if common_ancestor != across_sha:
188    raise ValueError(f"{across_sha} isn't an ancestor of {root_sha} "
189                     '(common ancestor: {common_ancestor})')
190
191  intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha))
192  assert across_sha not in intermediate_commits
193
194  logging.debug('%d commits appear between %s and %s',
195                len(intermediate_commits), across_sha, root_sha)
196
197  all_reverts = []
198  for sha, commit_message in _log_stream(git_dir, root_sha, across_sha):
199    reverts = _try_parse_reverts_from_commit_message(commit_message)
200    if not reverts:
201      continue
202
203    resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts))
204    for reverted_sha in resolved_reverts:
205      if reverted_sha in intermediate_commits:
206        logging.debug('Commit %s reverts %s, which happened after %s', sha,
207                      reverted_sha, across_sha)
208        continue
209
210      try:
211        object_type = subprocess.check_output(
212            ['git', '-C', git_dir, 'cat-file', '-t', reverted_sha],
213            encoding='utf-8',
214            stderr=subprocess.DEVNULL,
215        ).strip()
216      except subprocess.CalledProcessError:
217        logging.warning(
218            'Failed to resolve reverted object %s (claimed to be reverted '
219            'by sha %s)', reverted_sha, sha)
220        continue
221
222      if object_type == 'commit':
223        all_reverts.append(Revert(sha, reverted_sha))
224        continue
225
226      logging.error("%s claims to revert %s -- which isn't a commit -- %s", sha,
227                    object_type, reverted_sha)
228
229  # Since `all_reverts` contains reverts in log order (e.g., newer comes before
230  # older), we need to reverse this to keep with our guarantee of older =
231  # earlier in the result.
232  all_reverts.reverse()
233  return all_reverts
234
235
236def _main() -> None:
237  parser = argparse.ArgumentParser(
238      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
239  parser.add_argument(
240      'base_ref', help='Git ref or sha to check for reverts around.')
241  parser.add_argument(
242      '-C', '--git_dir', default='.', help='Git directory to use.')
243  parser.add_argument(
244      'root', nargs='+', help='Root(s) to search for commits from.')
245  parser.add_argument('--debug', action='store_true')
246  opts = parser.parse_args()
247
248  logging.basicConfig(
249      format='%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s',
250      level=logging.DEBUG if opts.debug else logging.INFO,
251  )
252
253  # `root`s can have related history, so we want to filter duplicate commits
254  # out. The overwhelmingly common case is also to have one root, and it's way
255  # easier to reason about output that comes in an order that's meaningful to
256  # git.
257  seen_reverts = set()
258  all_reverts = []
259  for root in opts.root:
260    for revert in find_reverts(opts.git_dir, opts.base_ref, root):
261      if revert not in seen_reverts:
262        seen_reverts.add(revert)
263        all_reverts.append(revert)
264
265  for revert in all_reverts:
266    print(f'{revert.sha} claims to revert {revert.reverted_sha}')
267
268
269if __name__ == '__main__':
270  _main()
271