• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2019 The ChromiumOS Authors
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Maps LLVM git SHAs to synthetic revision numbers and back.
8
9Revision numbers are all of the form '(branch_name, r1234)'. As a shorthand,
10r1234 is parsed as '(main, 1234)'.
11"""
12
13
14import argparse
15import re
16import subprocess
17import sys
18import typing as t
19
20
21MAIN_BRANCH = "main"
22
23# Note that after base_llvm_sha, we reach The Wild West(TM) of commits.
24# So reasonable input that could break us includes:
25#
26#   Revert foo
27#
28#   This reverts foo, which had the commit message:
29#
30#   bar
31#   llvm-svn: 375505
32#
33# While saddening, this is something we should probably try to handle
34# reasonably.
35base_llvm_revision = 375505
36base_llvm_sha = "186155b89c2d2a2f62337081e3ca15f676c9434b"
37
38# Represents an LLVM git checkout:
39#  - |dir| is the directory of the LLVM checkout
40#  - |remote| is the name of the LLVM remote. Generally it's "origin".
41LLVMConfig = t.NamedTuple("LLVMConfig", (("remote", str), ("dir", str)))
42
43
44class Rev(t.NamedTuple("Rev", (("branch", str), ("number", int)))):
45    """Represents a LLVM 'revision', a shorthand identifies a LLVM commit."""
46
47    @staticmethod
48    def parse(rev: str) -> "Rev":
49        """Parses a Rev from the given string.
50
51        Raises a ValueError on a failed parse.
52        """
53        # Revs are parsed into (${branch_name}, r${commits_since_base_commit})
54        # pairs.
55        #
56        # We support r${commits_since_base_commit} as shorthand for
57        # (main, r${commits_since_base_commit}).
58        if rev.startswith("r"):
59            branch_name = MAIN_BRANCH
60            rev_string = rev[1:]
61        else:
62            match = re.match(r"\((.+), r(\d+)\)", rev)
63            if not match:
64                raise ValueError("%r isn't a valid revision" % rev)
65
66            branch_name, rev_string = match.groups()
67
68        return Rev(branch=branch_name, number=int(rev_string))
69
70    def __str__(self) -> str:
71        branch_name, number = self
72        if branch_name == MAIN_BRANCH:
73            return "r%d" % number
74        return "(%s, r%d)" % (branch_name, number)
75
76
77def is_git_sha(xs: str) -> bool:
78    """Returns whether the given string looks like a valid git commit SHA."""
79    return (
80        len(xs) > 6
81        and len(xs) <= 40
82        and all(x.isdigit() or "a" <= x.lower() <= "f" for x in xs)
83    )
84
85
86def check_output(command: t.List[str], cwd: str) -> str:
87    """Shorthand for subprocess.check_output. Auto-decodes any stdout."""
88    result = subprocess.run(
89        command,
90        cwd=cwd,
91        check=True,
92        stdin=subprocess.DEVNULL,
93        stdout=subprocess.PIPE,
94        encoding="utf-8",
95    )
96    return result.stdout
97
98
99def translate_prebase_sha_to_rev_number(
100    llvm_config: LLVMConfig, sha: str
101) -> int:
102    """Translates a sha to a revision number (e.g., "llvm-svn: 1234").
103
104    This function assumes that the given SHA is an ancestor of |base_llvm_sha|.
105    """
106    commit_message = check_output(
107        ["git", "log", "-n1", "--format=%B", sha],
108        cwd=llvm_config.dir,
109    )
110    last_line = commit_message.strip().splitlines()[-1]
111    svn_match = re.match(r"^llvm-svn: (\d+)$", last_line)
112
113    if not svn_match:
114        raise ValueError(
115            f"No llvm-svn line found for {sha}, which... shouldn't happen?"
116        )
117
118    return int(svn_match.group(1))
119
120
121def translate_sha_to_rev(llvm_config: LLVMConfig, sha_or_ref: str) -> Rev:
122    """Translates a sha or git ref to a Rev."""
123
124    if is_git_sha(sha_or_ref):
125        sha = sha_or_ref
126    else:
127        sha = check_output(
128            ["git", "rev-parse", sha_or_ref],
129            cwd=llvm_config.dir,
130        )
131        sha = sha.strip()
132
133    merge_base = check_output(
134        ["git", "merge-base", base_llvm_sha, sha],
135        cwd=llvm_config.dir,
136    )
137    merge_base = merge_base.strip()
138
139    if merge_base == base_llvm_sha:
140        result = check_output(
141            [
142                "git",
143                "rev-list",
144                "--count",
145                "--first-parent",
146                f"{base_llvm_sha}..{sha}",
147            ],
148            cwd=llvm_config.dir,
149        )
150        count = int(result.strip())
151        return Rev(branch=MAIN_BRANCH, number=count + base_llvm_revision)
152
153    # Otherwise, either:
154    # - |merge_base| is |sha| (we have a guaranteed llvm-svn number on |sha|)
155    # - |merge_base| is neither (we have a guaranteed llvm-svn number on
156    #                            |merge_base|, but not |sha|)
157    merge_base_number = translate_prebase_sha_to_rev_number(
158        llvm_config, merge_base
159    )
160    if merge_base == sha:
161        return Rev(branch=MAIN_BRANCH, number=merge_base_number)
162
163    distance_from_base = check_output(
164        [
165            "git",
166            "rev-list",
167            "--count",
168            "--first-parent",
169            f"{merge_base}..{sha}",
170        ],
171        cwd=llvm_config.dir,
172    )
173
174    revision_number = merge_base_number + int(distance_from_base.strip())
175    branches_containing = check_output(
176        ["git", "branch", "-r", "--contains", sha],
177        cwd=llvm_config.dir,
178    )
179
180    candidates = []
181
182    prefix = llvm_config.remote + "/"
183    for branch in branches_containing.splitlines():
184        branch = branch.strip()
185        if branch.startswith(prefix):
186            candidates.append(branch[len(prefix) :])
187
188    if not candidates:
189        raise ValueError(
190            f"No viable branches found from {llvm_config.remote} with {sha}"
191        )
192
193    # It seems that some `origin/release/.*` branches have
194    # `origin/upstream/release/.*` equivalents, which is... awkward to deal with.
195    # Prefer the latter, since that seems to have newer commits than the former.
196    # Technically n^2, but len(elements) should be like, tens in the worst case.
197    candidates = [x for x in candidates if f"upstream/{x}" not in candidates]
198    if len(candidates) != 1:
199        raise ValueError(
200            f"Ambiguity: multiple branches from {llvm_config.remote} have {sha}: "
201            f"{sorted(candidates)}"
202        )
203
204    return Rev(branch=candidates[0], number=revision_number)
205
206
207def parse_git_commit_messages(
208    stream: t.Iterable[str], separator: str
209) -> t.Iterable[t.Tuple[str, str]]:
210    """Parses a stream of git log messages.
211
212    These are expected to be in the format:
213
214    40 character sha
215    commit
216    message
217    body
218    separator
219    40 character sha
220    commit
221    message
222    body
223    separator
224    """
225
226    lines = iter(stream)
227    while True:
228        # Looks like a potential bug in pylint? crbug.com/1041148
229        # pylint: disable=stop-iteration-return
230        sha = next(lines, None)
231        if sha is None:
232            return
233
234        sha = sha.strip()
235        assert is_git_sha(sha), f"Invalid git SHA: {sha}"
236
237        message = []
238        for line in lines:
239            if line.strip() == separator:
240                break
241            message.append(line)
242
243        yield sha, "".join(message)
244
245
246def translate_prebase_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
247    """Translates a Rev to a SHA.
248
249    This function assumes that the given rev refers to a commit that's an
250    ancestor of |base_llvm_sha|.
251    """
252    # Because reverts may include reverted commit messages, we can't just |-n1|
253    # and pick that.
254    separator = ">!" * 80
255    looking_for = f"llvm-svn: {rev.number}"
256
257    git_command = [
258        "git",
259        "log",
260        "--grep",
261        f"^{looking_for}$",
262        f"--format=%H%n%B{separator}",
263        base_llvm_sha,
264    ]
265
266    subp = subprocess.Popen(
267        git_command,
268        cwd=llvm_config.dir,
269        stdin=subprocess.DEVNULL,
270        stdout=subprocess.PIPE,
271        encoding="utf-8",
272    )
273
274    with subp:
275        for sha, message in parse_git_commit_messages(subp.stdout, separator):
276            last_line = message.splitlines()[-1]
277            if last_line.strip() == looking_for:
278                subp.terminate()
279                return sha
280
281    if subp.returncode:
282        raise subprocess.CalledProcessError(subp.returncode, git_command)
283    raise ValueError(f"No commit with revision {rev} found")
284
285
286def translate_rev_to_sha(llvm_config: LLVMConfig, rev: Rev) -> str:
287    """Translates a Rev to a SHA.
288
289    Raises a ValueError if the given Rev doesn't exist in the given config.
290    """
291    branch, number = rev
292
293    if branch == MAIN_BRANCH:
294        if number < base_llvm_revision:
295            return translate_prebase_rev_to_sha(llvm_config, rev)
296        base_sha = base_llvm_sha
297        base_revision_number = base_llvm_revision
298    else:
299        base_sha = check_output(
300            [
301                "git",
302                "merge-base",
303                base_llvm_sha,
304                f"{llvm_config.remote}/{branch}",
305            ],
306            cwd=llvm_config.dir,
307        )
308        base_sha = base_sha.strip()
309        if base_sha == base_llvm_sha:
310            base_revision_number = base_llvm_revision
311        else:
312            base_revision_number = translate_prebase_sha_to_rev_number(
313                llvm_config, base_sha
314            )
315
316    # Alternatively, we could |git log --format=%H|, but git is *super* fast
317    # about rev walking/counting locally compared to long |log|s, so we walk back
318    # twice.
319    head = check_output(
320        ["git", "rev-parse", f"{llvm_config.remote}/{branch}"],
321        cwd=llvm_config.dir,
322    )
323    branch_head_sha = head.strip()
324
325    commit_number = number - base_revision_number
326    revs_between_str = check_output(
327        [
328            "git",
329            "rev-list",
330            "--count",
331            "--first-parent",
332            f"{base_sha}..{branch_head_sha}",
333        ],
334        cwd=llvm_config.dir,
335    )
336    revs_between = int(revs_between_str.strip())
337
338    commits_behind_head = revs_between - commit_number
339    if commits_behind_head < 0:
340        raise ValueError(
341            f"Revision {rev} is past {llvm_config.remote}/{branch}. Try updating "
342            "your tree?"
343        )
344
345    result = check_output(
346        ["git", "rev-parse", f"{branch_head_sha}~{commits_behind_head}"],
347        cwd=llvm_config.dir,
348    )
349
350    return result.strip()
351
352
353def find_root_llvm_dir(root_dir: str = ".") -> str:
354    """Finds the root of an LLVM directory starting at |root_dir|.
355
356    Raises a subprocess.CalledProcessError if no git directory is found.
357    """
358    result = check_output(
359        ["git", "rev-parse", "--show-toplevel"],
360        cwd=root_dir,
361    )
362    return result.strip()
363
364
365def main(argv: t.List[str]) -> None:
366    parser = argparse.ArgumentParser(description=__doc__)
367    parser.add_argument(
368        "--llvm_dir",
369        help="LLVM directory to consult for git history, etc. Autodetected "
370        "if cwd is inside of an LLVM tree",
371    )
372    parser.add_argument(
373        "--upstream",
374        default="origin",
375        help="LLVM upstream's remote name. Defaults to %(default)s.",
376    )
377    sha_or_rev = parser.add_mutually_exclusive_group(required=True)
378    sha_or_rev.add_argument(
379        "--sha", help="A git SHA (or ref) to convert to a rev"
380    )
381    sha_or_rev.add_argument("--rev", help="A rev to convert into a sha")
382    opts = parser.parse_args(argv)
383
384    llvm_dir = opts.llvm_dir
385    if llvm_dir is None:
386        try:
387            llvm_dir = find_root_llvm_dir()
388        except subprocess.CalledProcessError:
389            parser.error(
390                "Couldn't autodetect an LLVM tree; please use --llvm_dir"
391            )
392
393    config = LLVMConfig(
394        remote=opts.upstream,
395        dir=opts.llvm_dir or find_root_llvm_dir(),
396    )
397
398    if opts.sha:
399        rev = translate_sha_to_rev(config, opts.sha)
400        print(rev)
401    else:
402        sha = translate_rev_to_sha(config, Rev.parse(opts.rev))
403        print(sha)
404
405
406if __name__ == "__main__":
407    main(sys.argv[1:])
408