• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2023 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#    http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""
16Regenerate a whl file after patching and cleanup the patched contents.
17
18This script will take contents of the current directory and create a new wheel
19out of it and will remove all files that were written to the wheel.
20"""
21
22from __future__ import annotations
23
24import argparse
25import csv
26import difflib
27import logging
28import pathlib
29import sys
30import tempfile
31
32from tools.wheelmaker import _WhlFile
33
34# NOTE: Implement the following matching of what goes into the RECORD
35# https://peps.python.org/pep-0491/#the-dist-info-directory
36_EXCLUDES = [
37    "RECORD",
38    "INSTALLER",
39    "RECORD.jws",
40    "RECORD.p7s",
41    "REQUESTED",
42]
43
44_DISTINFO = "dist-info"
45
46
47def _unidiff_output(expected, actual, record):
48    """
49    Helper function. Returns a string containing the unified diff of two
50    multiline strings.
51    """
52
53    expected = expected.splitlines(1)
54    actual = actual.splitlines(1)
55
56    diff = difflib.unified_diff(
57        expected, actual, fromfile=f"a/{record}", tofile=f"b/{record}"
58    )
59
60    return "".join(diff)
61
62
63def _files_to_pack(dir: pathlib.Path, want_record: str) -> list[pathlib.Path]:
64    """Check that the RECORD file entries are correct and print a unified diff on failure."""
65
66    # First get existing files by using the RECORD file
67    got_files = []
68    got_distinfos = []
69    for row in csv.reader(want_record.splitlines()):
70        rec = row[0]
71        path = dir / rec
72
73        if not path.exists():
74            # skip files that do not exist as they won't be present in the final
75            # RECORD file.
76            continue
77
78        if not path.parent.name.endswith(_DISTINFO):
79            got_files.append(path)
80        elif path.name not in _EXCLUDES:
81            got_distinfos.append(path)
82
83    # Then get extra files present in the directory but not in the RECORD file
84    extra_files = []
85    extra_distinfos = []
86    for path in dir.rglob("*"):
87        if path.is_dir():
88            continue
89
90        elif path.parent.name.endswith(_DISTINFO):
91            if path.name in _EXCLUDES:
92                # NOTE: we implement the following matching of what goes into the RECORD
93                # https://peps.python.org/pep-0491/#the-dist-info-directory
94                continue
95            elif path not in got_distinfos:
96                extra_distinfos.append(path)
97
98        elif path not in got_files:
99            extra_files.append(path)
100
101    # sort the extra files for reproducibility
102    extra_files.sort()
103    extra_distinfos.sort()
104
105    # This order ensures that the structure of the RECORD file is always the
106    # same and ensures smaller patchsets to the RECORD file in general
107    return got_files + extra_files + got_distinfos + extra_distinfos
108
109
110def main(sys_argv):
111    parser = argparse.ArgumentParser(description=__doc__)
112    parser.add_argument(
113        "whl_path",
114        type=pathlib.Path,
115        help="The original wheel file that we have patched.",
116    )
117    parser.add_argument(
118        "--record-patch",
119        type=pathlib.Path,
120        help="The output path that we are going to write the RECORD file patch to.",
121    )
122    parser.add_argument(
123        "output",
124        type=pathlib.Path,
125        help="The output path that we are going to write a new file to.",
126    )
127    args = parser.parse_args(sys_argv)
128
129    cwd = pathlib.Path.cwd()
130    logging.debug("=" * 80)
131    logging.debug("Repackaging the wheel")
132    logging.debug("=" * 80)
133
134    with tempfile.TemporaryDirectory(dir=cwd) as tmpdir:
135        patched_wheel_dir = cwd / tmpdir
136        logging.debug(f"Created a tmpdir: {patched_wheel_dir}")
137
138        excludes = [args.whl_path, patched_wheel_dir]
139
140        logging.debug("Moving whl contents to the newly created tmpdir")
141        for p in cwd.glob("*"):
142            if p in excludes:
143                logging.debug(f"Ignoring: {p}")
144                continue
145
146            rel_path = p.relative_to(cwd)
147            dst = p.rename(patched_wheel_dir / rel_path)
148            logging.debug(f"mv {p} -> {dst}")
149
150        distinfo_dir = next(iter(patched_wheel_dir.glob("*dist-info")))
151        logging.debug(f"Found dist-info dir: {distinfo_dir}")
152        record_path = distinfo_dir / "RECORD"
153        record_contents = record_path.read_text() if record_path.exists() else ""
154        distribution_prefix = distinfo_dir.with_suffix("").name
155
156        with _WhlFile(
157            args.output, mode="w", distribution_prefix=distribution_prefix
158        ) as out:
159            for p in _files_to_pack(patched_wheel_dir, record_contents):
160                rel_path = p.relative_to(patched_wheel_dir)
161                out.add_file(str(rel_path), p)
162
163            logging.debug(f"Writing RECORD file")
164            got_record = out.add_recordfile().decode("utf-8", "surrogateescape")
165
166    if got_record == record_contents:
167        logging.info(f"Created a whl file: {args.output}")
168        return
169
170    record_diff = _unidiff_output(
171        record_contents,
172        got_record,
173        out.distinfo_path("RECORD"),
174    )
175    args.record_patch.write_text(record_diff)
176    logging.warning(
177        f"Please apply patch to the RECORD file ({args.record_patch}):\n{record_diff}"
178    )
179
180
181if __name__ == "__main__":
182    logging.basicConfig(
183        format="%(module)s: %(levelname)s: %(message)s", level=logging.DEBUG
184    )
185
186    sys.exit(main(sys.argv[1:]))
187