1# Copyright 2023 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15""" 16Regenerate a whl file after patching and cleanup the patched contents. 17 18This script will take contents of the current directory and create a new wheel 19out of it and will remove all files that were written to the wheel. 20""" 21 22from __future__ import annotations 23 24import argparse 25import csv 26import difflib 27import logging 28import pathlib 29import sys 30import tempfile 31 32from tools.wheelmaker import _WhlFile 33 34# NOTE: Implement the following matching of what goes into the RECORD 35# https://peps.python.org/pep-0491/#the-dist-info-directory 36_EXCLUDES = [ 37 "RECORD", 38 "INSTALLER", 39 "RECORD.jws", 40 "RECORD.p7s", 41 "REQUESTED", 42] 43 44_DISTINFO = "dist-info" 45 46 47def _unidiff_output(expected, actual, record): 48 """ 49 Helper function. Returns a string containing the unified diff of two 50 multiline strings. 51 """ 52 53 expected = expected.splitlines(1) 54 actual = actual.splitlines(1) 55 56 diff = difflib.unified_diff( 57 expected, actual, fromfile=f"a/{record}", tofile=f"b/{record}" 58 ) 59 60 return "".join(diff) 61 62 63def _files_to_pack(dir: pathlib.Path, want_record: str) -> list[pathlib.Path]: 64 """Check that the RECORD file entries are correct and print a unified diff on failure.""" 65 66 # First get existing files by using the RECORD file 67 got_files = [] 68 got_distinfos = [] 69 for row in csv.reader(want_record.splitlines()): 70 rec = row[0] 71 path = dir / rec 72 73 if not path.exists(): 74 # skip files that do not exist as they won't be present in the final 75 # RECORD file. 76 continue 77 78 if not path.parent.name.endswith(_DISTINFO): 79 got_files.append(path) 80 elif path.name not in _EXCLUDES: 81 got_distinfos.append(path) 82 83 # Then get extra files present in the directory but not in the RECORD file 84 extra_files = [] 85 extra_distinfos = [] 86 for path in dir.rglob("*"): 87 if path.is_dir(): 88 continue 89 90 elif path.parent.name.endswith(_DISTINFO): 91 if path.name in _EXCLUDES: 92 # NOTE: we implement the following matching of what goes into the RECORD 93 # https://peps.python.org/pep-0491/#the-dist-info-directory 94 continue 95 elif path not in got_distinfos: 96 extra_distinfos.append(path) 97 98 elif path not in got_files: 99 extra_files.append(path) 100 101 # sort the extra files for reproducibility 102 extra_files.sort() 103 extra_distinfos.sort() 104 105 # This order ensures that the structure of the RECORD file is always the 106 # same and ensures smaller patchsets to the RECORD file in general 107 return got_files + extra_files + got_distinfos + extra_distinfos 108 109 110def main(sys_argv): 111 parser = argparse.ArgumentParser(description=__doc__) 112 parser.add_argument( 113 "whl_path", 114 type=pathlib.Path, 115 help="The original wheel file that we have patched.", 116 ) 117 parser.add_argument( 118 "--record-patch", 119 type=pathlib.Path, 120 help="The output path that we are going to write the RECORD file patch to.", 121 ) 122 parser.add_argument( 123 "output", 124 type=pathlib.Path, 125 help="The output path that we are going to write a new file to.", 126 ) 127 args = parser.parse_args(sys_argv) 128 129 cwd = pathlib.Path.cwd() 130 logging.debug("=" * 80) 131 logging.debug("Repackaging the wheel") 132 logging.debug("=" * 80) 133 134 with tempfile.TemporaryDirectory(dir=cwd) as tmpdir: 135 patched_wheel_dir = cwd / tmpdir 136 logging.debug(f"Created a tmpdir: {patched_wheel_dir}") 137 138 excludes = [args.whl_path, patched_wheel_dir] 139 140 logging.debug("Moving whl contents to the newly created tmpdir") 141 for p in cwd.glob("*"): 142 if p in excludes: 143 logging.debug(f"Ignoring: {p}") 144 continue 145 146 rel_path = p.relative_to(cwd) 147 dst = p.rename(patched_wheel_dir / rel_path) 148 logging.debug(f"mv {p} -> {dst}") 149 150 distinfo_dir = next(iter(patched_wheel_dir.glob("*dist-info"))) 151 logging.debug(f"Found dist-info dir: {distinfo_dir}") 152 record_path = distinfo_dir / "RECORD" 153 record_contents = record_path.read_text() if record_path.exists() else "" 154 distribution_prefix = distinfo_dir.with_suffix("").name 155 156 with _WhlFile( 157 args.output, mode="w", distribution_prefix=distribution_prefix 158 ) as out: 159 for p in _files_to_pack(patched_wheel_dir, record_contents): 160 rel_path = p.relative_to(patched_wheel_dir) 161 out.add_file(str(rel_path), p) 162 163 logging.debug(f"Writing RECORD file") 164 got_record = out.add_recordfile().decode("utf-8", "surrogateescape") 165 166 if got_record == record_contents: 167 logging.info(f"Created a whl file: {args.output}") 168 return 169 170 record_diff = _unidiff_output( 171 record_contents, 172 got_record, 173 out.distinfo_path("RECORD"), 174 ) 175 args.record_patch.write_text(record_diff) 176 logging.warning( 177 f"Please apply patch to the RECORD file ({args.record_patch}):\n{record_diff}" 178 ) 179 180 181if __name__ == "__main__": 182 logging.basicConfig( 183 format="%(module)s: %(levelname)s: %(message)s", level=logging.DEBUG 184 ) 185 186 sys.exit(main(sys.argv[1:])) 187