1#!/usr/bin/env python3 2# -*- coding: utf-8 -*- 3# Copyright 2019 The ChromiumOS Authors 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7"""Maps LLVM git SHAs to synthetic revision numbers and back. 8 9Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand, 10r1234 is parsed as '(main, 1234)'. 11""" 12 13 14import argparse 15import re 16import subprocess 17import sys 18import typing as t 19 20 21MAIN_BRANCH = "main" 22 23# Note that after base_llvm_sha, we reach The Wild West(TM) of commits. 24# So reasonable input that could break us includes: 25# 26# Revert foo 27# 28# This reverts foo, which had the commit message: 29# 30# bar 31# llvm-svn: 375505 32# 33# While saddening, this is something we should probably try to handle 34# reasonably. 35base_llvm_revision = 375505 36base_llvm_sha = "186155b89c2d2a2f62337081e3ca15f676c9434b" 37 38# Represents an LLVM git checkout: 39# - |dir| is the directory of the LLVM checkout 40# - |remote| is the name of the LLVM remote. Generally it's "origin". 41LLVMConfig = t.NamedTuple("LLVMConfig", (("remote", str), ("dir", str))) 42 43 44class Rev(t.NamedTuple("Rev", (("branch", str), ("number", int)))): 45 """Represents a LLVM 'revision', a shorthand identifies a LLVM commit.""" 46 47 @staticmethod 48 def parse(rev: str) -> "Rev": 49 """Parses a Rev from the given string. 50 51 Raises a ValueError on a failed parse. 52 """ 53 # Revs are parsed into (${branch_name}, r${commits_since_base_commit}) 54 # pairs. 55 # 56 # We support r${commits_since_base_commit} as shorthand for 57 # (main, r${commits_since_base_commit}). 58 if rev.startswith("r"): 59 branch_name = MAIN_BRANCH 60 rev_string = rev[1:] 61 else: 62 match = re.match(r"\((.+), r(\d+)\)", rev) 63 if not match: 64 raise ValueError("%r isn't a valid revision" % rev) 65 66 branch_name, rev_string = match.groups() 67 68 return Rev(branch=branch_name, number=int(rev_string)) 69 70 def __str__(self) -> str: 71 branch_name, number = self 72 if branch_name == MAIN_BRANCH: 73 return "r%d" % number 74 return "(%s, r%d)" % (branch_name, number) 75 76 77def is_git_sha(xs: str) -> bool: 78 """Returns whether the given string looks like a valid git commit SHA.""" 79 return ( 80 len(xs) > 6 81 and len(xs) <= 40 82 and all(x.isdigit() or "a" <= x.lower() <= "f" for x in xs) 83 ) 84 85 86def check_output(command: t.List[str], cwd: str) -> str: 87 """Shorthand for subprocess.check_output. Auto-decodes any stdout.""" 88 result = subprocess.run( 89 command, 90 cwd=cwd, 91 check=True, 92 stdin=subprocess.DEVNULL, 93 stdout=subprocess.PIPE, 94 encoding="utf-8", 95 ) 96 return result.stdout 97 98 99def translate_prebase_sha_to_rev_number( 100 llvm_config: LLVMConfig, sha: str 101) -> int: 102 """Translates a sha to a revision number (e.g., "llvm-svn: 1234"). 103 104 This function assumes that the given SHA is an ancestor of |base_llvm_sha|. 105 """ 106 commit_message = check_output( 107 ["git", "log", "-n1", "--format=%B", sha], 108 cwd=llvm_config.dir, 109 ) 110 last_line = commit_message.strip().splitlines()[-1] 111 svn_match = re.match(r"^llvm-svn: (\d+)$", last_line) 112 113 if not svn_match: 114 raise ValueError( 115 f"No llvm-svn line found for {sha}, which... shouldn't happen?" 116 ) 117 118 return int(svn_match.group(1)) 119 120 121def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev: 122 """Translates a sha or git ref to a Rev.""" 123 124 if is_git_sha(sha_or_ref): 125 sha = sha_or_ref 126 else: 127 sha = check_output( 128 ["git", "rev-parse", sha_or_ref], 129 cwd=llvm_config.dir, 130 ) 131 sha = sha.strip() 132 133 merge_base = check_output( 134 ["git", "merge-base", base_llvm_sha, sha], 135 cwd=llvm_config.dir, 136 ) 137 merge_base = merge_base.strip() 138 139 if merge_base == base_llvm_sha: 140 result = check_output( 141 [ 142 "git", 143 "rev-list", 144 "--count", 145 "--first-parent", 146 f"{base_llvm_sha}..{sha}", 147 ], 148 cwd=llvm_config.dir, 149 ) 150 count = int(result.strip()) 151 return Rev(branch=MAIN_BRANCH, number=count + base_llvm_revision) 152 153 # Otherwise, either: 154 # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|) 155 # - |merge_base| is neither (we have a guaranteed llvm-svn number on 156 # |merge_base|, but not |sha|) 157 merge_base_number = translate_prebase_sha_to_rev_number( 158 llvm_config, merge_base 159 ) 160 if merge_base == sha: 161 return Rev(branch=MAIN_BRANCH, number=merge_base_number) 162 163 distance_from_base = check_output( 164 [ 165 "git", 166 "rev-list", 167 "--count", 168 "--first-parent", 169 f"{merge_base}..{sha}", 170 ], 171 cwd=llvm_config.dir, 172 ) 173 174 revision_number = merge_base_number + int(distance_from_base.strip()) 175 branches_containing = check_output( 176 ["git", "branch", "-r", "--contains", sha], 177 cwd=llvm_config.dir, 178 ) 179 180 candidates = [] 181 182 prefix = llvm_config.remote + "/" 183 for branch in branches_containing.splitlines(): 184 branch = branch.strip() 185 if branch.startswith(prefix): 186 candidates.append(branch[len(prefix) :]) 187 188 if not candidates: 189 raise ValueError( 190 f"No viable branches found from {llvm_config.remote} with {sha}" 191 ) 192 193 # It seems that some `origin/release/.*` branches have 194 # `origin/upstream/release/.*` equivalents, which is... awkward to deal with. 195 # Prefer the latter, since that seems to have newer commits than the former. 196 # Technically n^2, but len(elements) should be like, tens in the worst case. 197 candidates = [x for x in candidates if f"upstream/{x}" not in candidates] 198 if len(candidates) != 1: 199 raise ValueError( 200 f"Ambiguity: multiple branches from {llvm_config.remote} have {sha}: " 201 f"{sorted(candidates)}" 202 ) 203 204 return Rev(branch=candidates[0], number=revision_number) 205 206 207def parse_git_commit_messages( 208 stream: t.Iterable[str], separator: str 209) -> t.Iterable[t.Tuple[str, str]]: 210 """Parses a stream of git log messages. 211 212 These are expected to be in the format: 213 214 40 character sha 215 commit 216 message 217 body 218 separator 219 40 character sha 220 commit 221 message 222 body 223 separator 224 """ 225 226 lines = iter(stream) 227 while True: 228 # Looks like a potential bug in pylint? crbug.com/1041148 229 # pylint: disable=stop-iteration-return 230 sha = next(lines, None) 231 if sha is None: 232 return 233 234 sha = sha.strip() 235 assert is_git_sha(sha), f"Invalid git SHA: {sha}" 236 237 message = [] 238 for line in lines: 239 if line.strip() == separator: 240 break 241 message.append(line) 242 243 yield sha, "".join(message) 244 245 246def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: 247 """Translates a Rev to a SHA. 248 249 This function assumes that the given rev refers to a commit that's an 250 ancestor of |base_llvm_sha|. 251 """ 252 # Because reverts may include reverted commit messages, we can't just |-n1| 253 # and pick that. 254 separator = ">!" * 80 255 looking_for = f"llvm-svn: {rev.number}" 256 257 git_command = [ 258 "git", 259 "log", 260 "--grep", 261 f"^{looking_for}$", 262 f"--format=%H%n%B{separator}", 263 base_llvm_sha, 264 ] 265 266 subp = subprocess.Popen( 267 git_command, 268 cwd=llvm_config.dir, 269 stdin=subprocess.DEVNULL, 270 stdout=subprocess.PIPE, 271 encoding="utf-8", 272 ) 273 274 with subp: 275 for sha, message in parse_git_commit_messages(subp.stdout, separator): 276 last_line = message.splitlines()[-1] 277 if last_line.strip() == looking_for: 278 subp.terminate() 279 return sha 280 281 if subp.returncode: 282 raise subprocess.CalledProcessError(subp.returncode, git_command) 283 raise ValueError(f"No commit with revision {rev} found") 284 285 286def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str: 287 """Translates a Rev to a SHA. 288 289 Raises a ValueError if the given Rev doesn't exist in the given config. 290 """ 291 branch, number = rev 292 293 if branch == MAIN_BRANCH: 294 if number < base_llvm_revision: 295 return translate_prebase_rev_to_sha(llvm_config, rev) 296 base_sha = base_llvm_sha 297 base_revision_number = base_llvm_revision 298 else: 299 base_sha = check_output( 300 [ 301 "git", 302 "merge-base", 303 base_llvm_sha, 304 f"{llvm_config.remote}/{branch}", 305 ], 306 cwd=llvm_config.dir, 307 ) 308 base_sha = base_sha.strip() 309 if base_sha == base_llvm_sha: 310 base_revision_number = base_llvm_revision 311 else: 312 base_revision_number = translate_prebase_sha_to_rev_number( 313 llvm_config, base_sha 314 ) 315 316 # Alternatively, we could |git log --format=%H|, but git is *super* fast 317 # about rev walking/counting locally compared to long |log|s, so we walk back 318 # twice. 319 head = check_output( 320 ["git", "rev-parse", f"{llvm_config.remote}/{branch}"], 321 cwd=llvm_config.dir, 322 ) 323 branch_head_sha = head.strip() 324 325 commit_number = number - base_revision_number 326 revs_between_str = check_output( 327 [ 328 "git", 329 "rev-list", 330 "--count", 331 "--first-parent", 332 f"{base_sha}..{branch_head_sha}", 333 ], 334 cwd=llvm_config.dir, 335 ) 336 revs_between = int(revs_between_str.strip()) 337 338 commits_behind_head = revs_between - commit_number 339 if commits_behind_head < 0: 340 raise ValueError( 341 f"Revision {rev} is past {llvm_config.remote}/{branch}. Try updating " 342 "your tree?" 343 ) 344 345 result = check_output( 346 ["git", "rev-parse", f"{branch_head_sha}~{commits_behind_head}"], 347 cwd=llvm_config.dir, 348 ) 349 350 return result.strip() 351 352 353def find_root_llvm_dir(root_dir: str = ".") -> str: 354 """Finds the root of an LLVM directory starting at |root_dir|. 355 356 Raises a subprocess.CalledProcessError if no git directory is found. 357 """ 358 result = check_output( 359 ["git", "rev-parse", "--show-toplevel"], 360 cwd=root_dir, 361 ) 362 return result.strip() 363 364 365def main(argv: t.List[str]) -> None: 366 parser = argparse.ArgumentParser(description=__doc__) 367 parser.add_argument( 368 "--llvm_dir", 369 help="LLVM directory to consult for git history, etc. Autodetected " 370 "if cwd is inside of an LLVM tree", 371 ) 372 parser.add_argument( 373 "--upstream", 374 default="origin", 375 help="LLVM upstream's remote name. Defaults to %(default)s.", 376 ) 377 sha_or_rev = parser.add_mutually_exclusive_group(required=True) 378 sha_or_rev.add_argument( 379 "--sha", help="A git SHA (or ref) to convert to a rev" 380 ) 381 sha_or_rev.add_argument("--rev", help="A rev to convert into a sha") 382 opts = parser.parse_args(argv) 383 384 llvm_dir = opts.llvm_dir 385 if llvm_dir is None: 386 try: 387 llvm_dir = find_root_llvm_dir() 388 except subprocess.CalledProcessError: 389 parser.error( 390 "Couldn't autodetect an LLVM tree; please use --llvm_dir" 391 ) 392 393 config = LLVMConfig( 394 remote=opts.upstream, 395 dir=opts.llvm_dir or find_root_llvm_dir(), 396 ) 397 398 if opts.sha: 399 rev = translate_sha_to_rev(config, opts.sha) 400 print(rev) 401 else: 402 sha = translate_rev_to_sha(config, Rev.parse(opts.rev)) 403 print(sha) 404 405 406if __name__ == "__main__": 407 main(sys.argv[1:]) 408