1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3#===----------------------------------------------------------------------===## 4# 5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6# See https://llvm.org/LICENSE.txt for license information. 7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8# 9#===----------------------------------------------------------------------===## 10# 11# !!!!!!!!!!!! NOTE !!!!!!!!!!!! 12# This is copied directly from upstream LLVM. Please make any changes upstream, 13# rather than to this file directly. Once changes are made there, you're free 14# to integrate them here. 15 16"""Checks for reverts of commits across a given git commit. 17 18To clarify the meaning of 'across' with an example, if we had the following 19commit history (where `a -> b` notes that `b` is a direct child of `a`): 20 21123abc -> 223abc -> 323abc -> 423abc -> 523abc 22 23And where 423abc is a revert of 223abc, this revert is considered to be 'across' 24323abc. More generally, a revert A of a parent commit B is considered to be 25'across' a commit C if C is a parent of A and B is a parent of C. 26 27Please note that revert detection in general is really difficult, since merge 28conflicts/etc always introduce _some_ amount of fuzziness. This script just 29uses a bundle of heuristics, and is bound to ignore / incorrectly flag some 30reverts. The hope is that it'll easily catch the vast majority (>90%) of them, 31though. 32 33This is designed to be used in one of two ways: an import in Python, or run 34directly from a shell. If you want to import this, the `find_reverts` 35function is the thing to look at. If you'd rather use this from a shell, have a 36usage example: 37 38``` 39./revert_checker.py c47f97169 origin/main origin/release/12.x 40``` 41 42This checks for all reverts from the tip of origin/main to c47f97169, which are 43across the latter. It then does the same for origin/release/12.x to c47f97169. 44Duplicate reverts discovered when walking both roots (origin/main and 45origin/release/12.x) are deduplicated in output. 46""" 47 48import argparse 49import collections 50import logging 51import re 52import subprocess 53import sys 54from typing import Generator, List, NamedTuple, Iterable 55 56assert sys.version_info >= (3, 6), 'Only Python 3.6+ is supported.' 57 58# People are creative with their reverts, and heuristics are a bit difficult. 59# Like 90% of of reverts have "This reverts commit ${full_sha}". 60# Some lack that entirely, while others have many of them specified in ad-hoc 61# ways, while others use short SHAs and whatever. 62# 63# The 90% case is trivial to handle (and 100% free + automatic). The extra 10% 64# starts involving human intervention, which is probably not worth it for now. 65 66 67def _try_parse_reverts_from_commit_message(commit_message: str) -> List[str]: 68 if not commit_message: 69 return [] 70 71 results = re.findall(r'This reverts commit ([a-f0-9]{40})\b', commit_message) 72 73 first_line = commit_message.splitlines()[0] 74 initial_revert = re.match(r'Revert ([a-f0-9]{6,}) "', first_line) 75 if initial_revert: 76 results.append(initial_revert.group(1)) 77 return results 78 79 80def _stream_stdout(command: List[str]) -> Generator[str, None, None]: 81 with subprocess.Popen( 82 command, stdout=subprocess.PIPE, encoding='utf-8', errors='replace') as p: 83 assert p.stdout is not None # for mypy's happiness. 84 yield from p.stdout 85 86 87def _resolve_sha(git_dir: str, sha: str) -> str: 88 if len(sha) == 40: 89 return sha 90 91 return subprocess.check_output( 92 ['git', '-C', git_dir, 'rev-parse', sha], 93 encoding='utf-8', 94 stderr=subprocess.DEVNULL, 95 ).strip() 96 97 98_LogEntry = NamedTuple('_LogEntry', [ 99 ('sha', str), 100 ('commit_message', str), 101]) 102 103 104def _log_stream(git_dir: str, root_sha: str, 105 end_at_sha: str) -> Iterable[_LogEntry]: 106 sep = 50 * '<>' 107 log_command = [ 108 'git', 109 '-C', 110 git_dir, 111 'log', 112 '^' + end_at_sha, 113 root_sha, 114 '--format=' + sep + '%n%H%n%B%n', 115 ] 116 117 stdout_stream = iter(_stream_stdout(log_command)) 118 119 # Find the next separator line. If there's nothing to log, it may not exist. 120 # It might not be the first line if git feels complainy. 121 found_commit_header = False 122 for line in stdout_stream: 123 if line.rstrip() == sep: 124 found_commit_header = True 125 break 126 127 while found_commit_header: 128 sha = next(stdout_stream, None) 129 assert sha is not None, 'git died?' 130 sha = sha.rstrip() 131 132 commit_message = [] 133 134 found_commit_header = False 135 for line in stdout_stream: 136 line = line.rstrip() 137 if line.rstrip() == sep: 138 found_commit_header = True 139 break 140 commit_message.append(line) 141 142 yield _LogEntry(sha, '\n'.join(commit_message).rstrip()) 143 144 145def _shas_between(git_dir: str, base_ref: str, head_ref: str) -> Iterable[str]: 146 rev_list = [ 147 'git', 148 '-C', 149 git_dir, 150 'rev-list', 151 '--first-parent', 152 f'{base_ref}..{head_ref}', 153 ] 154 return (x.strip() for x in _stream_stdout(rev_list)) 155 156 157def _rev_parse(git_dir: str, ref: str) -> str: 158 return subprocess.check_output( 159 ['git', '-C', git_dir, 'rev-parse', ref], 160 encoding='utf-8', 161 ).strip() 162 163 164Revert = NamedTuple('Revert', [ 165 ('sha', str), 166 ('reverted_sha', str), 167]) 168 169 170def _find_common_parent_commit(git_dir: str, ref_a: str, ref_b: str) -> str: 171 """Finds the closest common parent commit between `ref_a` and `ref_b`.""" 172 return subprocess.check_output( 173 ['git', '-C', git_dir, 'merge-base', ref_a, ref_b], 174 encoding='utf-8', 175 ).strip() 176 177 178def find_reverts(git_dir: str, across_ref: str, root: str) -> List[Revert]: 179 """Finds reverts across `across_ref` in `git_dir`, starting from `root`. 180 181 These reverts are returned in order of oldest reverts first. 182 """ 183 across_sha = _rev_parse(git_dir, across_ref) 184 root_sha = _rev_parse(git_dir, root) 185 186 common_ancestor = _find_common_parent_commit(git_dir, across_sha, root_sha) 187 if common_ancestor != across_sha: 188 raise ValueError(f"{across_sha} isn't an ancestor of {root_sha} " 189 '(common ancestor: {common_ancestor})') 190 191 intermediate_commits = set(_shas_between(git_dir, across_sha, root_sha)) 192 assert across_sha not in intermediate_commits 193 194 logging.debug('%d commits appear between %s and %s', 195 len(intermediate_commits), across_sha, root_sha) 196 197 all_reverts = [] 198 for sha, commit_message in _log_stream(git_dir, root_sha, across_sha): 199 reverts = _try_parse_reverts_from_commit_message(commit_message) 200 if not reverts: 201 continue 202 203 resolved_reverts = sorted(set(_resolve_sha(git_dir, x) for x in reverts)) 204 for reverted_sha in resolved_reverts: 205 if reverted_sha in intermediate_commits: 206 logging.debug('Commit %s reverts %s, which happened after %s', sha, 207 reverted_sha, across_sha) 208 continue 209 210 try: 211 object_type = subprocess.check_output( 212 ['git', '-C', git_dir, 'cat-file', '-t', reverted_sha], 213 encoding='utf-8', 214 stderr=subprocess.DEVNULL, 215 ).strip() 216 except subprocess.CalledProcessError: 217 logging.warning( 218 'Failed to resolve reverted object %s (claimed to be reverted ' 219 'by sha %s)', reverted_sha, sha) 220 continue 221 222 if object_type == 'commit': 223 all_reverts.append(Revert(sha, reverted_sha)) 224 continue 225 226 logging.error("%s claims to revert %s -- which isn't a commit -- %s", sha, 227 object_type, reverted_sha) 228 229 # Since `all_reverts` contains reverts in log order (e.g., newer comes before 230 # older), we need to reverse this to keep with our guarantee of older = 231 # earlier in the result. 232 all_reverts.reverse() 233 return all_reverts 234 235 236def _main() -> None: 237 parser = argparse.ArgumentParser( 238 description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) 239 parser.add_argument( 240 'base_ref', help='Git ref or sha to check for reverts around.') 241 parser.add_argument( 242 '-C', '--git_dir', default='.', help='Git directory to use.') 243 parser.add_argument( 244 'root', nargs='+', help='Root(s) to search for commits from.') 245 parser.add_argument('--debug', action='store_true') 246 opts = parser.parse_args() 247 248 logging.basicConfig( 249 format='%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s', 250 level=logging.DEBUG if opts.debug else logging.INFO, 251 ) 252 253 # `root`s can have related history, so we want to filter duplicate commits 254 # out. The overwhelmingly common case is also to have one root, and it's way 255 # easier to reason about output that comes in an order that's meaningful to 256 # git. 257 seen_reverts = set() 258 all_reverts = [] 259 for root in opts.root: 260 for revert in find_reverts(opts.git_dir, opts.base_ref, root): 261 if revert not in seen_reverts: 262 seen_reverts.add(revert) 263 all_reverts.append(revert) 264 265 for revert in all_reverts: 266 print(f'{revert.sha} claims to revert {revert.reverted_sha}') 267 268 269if __name__ == '__main__': 270 _main() 271