1# Copyright 2018 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15from __future__ import annotations 16 17import argparse 18import base64 19import csv 20import hashlib 21import io 22import os 23import re 24import stat 25import sys 26import zipfile 27from pathlib import Path 28 29_ZIP_EPOCH = (1980, 1, 1, 0, 0, 0) 30 31 32def commonpath(path1, path2): 33 ret = [] 34 for a, b in zip(path1.split(os.path.sep), path2.split(os.path.sep)): 35 if a != b: 36 break 37 ret.append(a) 38 return os.path.sep.join(ret) 39 40 41def escape_filename_segment(segment): 42 """Escapes a filename segment per https://www.python.org/dev/peps/pep-0427/#escaping-and-unicode 43 44 This is a legacy function, kept for backwards compatibility, 45 and may be removed in the future. See `escape_filename_distribution_name` 46 and `normalize_pep440` for the modern alternatives. 47 """ 48 return re.sub(r"[^\w\d.]+", "_", segment, re.UNICODE) 49 50 51def normalize_package_name(name): 52 """Normalize a package name according to the Python Packaging User Guide. 53 54 See https://packaging.python.org/en/latest/specifications/name-normalization/ 55 """ 56 return re.sub(r"[-_.]+", "-", name).lower() 57 58 59def escape_filename_distribution_name(name): 60 """Escape the distribution name component of a filename. 61 62 See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode 63 """ 64 return normalize_package_name(name).replace("-", "_") 65 66 67def normalize_pep440(version): 68 """Normalize version according to PEP 440, with fallback for placeholders. 69 70 If there's a placeholder in braces, such as {BUILD_TIMESTAMP}, 71 replace it with 0. Such placeholders can be used with stamping, in 72 which case they would have been resolved already by now; if they 73 haven't, we're doing an unstamped build, but we still need to 74 produce a valid version. If such replacements are made, the 75 original version string, sanitized to dot-separated alphanumerics, 76 is appended as a local version segment, so you understand what 77 placeholder was involved. 78 79 If that still doesn't produce a valid version, use version 0 and 80 append the original version string, sanitized to dot-separated 81 alphanumerics, as a local version segment. 82 83 """ 84 85 import packaging.version 86 87 try: 88 return str(packaging.version.Version(version)) 89 except packaging.version.InvalidVersion: 90 pass 91 92 sanitized = re.sub(r"[^a-z0-9]+", ".", version.lower()).strip(".") 93 substituted = re.sub(r"\{\w+\}", "0", version) 94 delimiter = "." if "+" in substituted else "+" 95 try: 96 return str(packaging.version.Version(f"{substituted}{delimiter}{sanitized}")) 97 except packaging.version.InvalidVersion: 98 return str(packaging.version.Version(f"0+{sanitized}")) 99 100 101class _WhlFile(zipfile.ZipFile): 102 def __init__( 103 self, 104 filename, 105 *, 106 mode, 107 distribution_prefix: str, 108 strip_path_prefixes=None, 109 compression=zipfile.ZIP_DEFLATED, 110 **kwargs, 111 ): 112 self._distribution_prefix = distribution_prefix 113 114 self._strip_path_prefixes = strip_path_prefixes or [] 115 # Entries for the RECORD file as (filename, hash, size) tuples. 116 self._record = [] 117 118 super().__init__(filename, mode=mode, compression=compression, **kwargs) 119 120 def distinfo_path(self, basename): 121 return f"{self._distribution_prefix}.dist-info/{basename}" 122 123 def data_path(self, basename): 124 return f"{self._distribution_prefix}.data/{basename}" 125 126 def add_file(self, package_filename, real_filename): 127 """Add given file to the distribution.""" 128 129 def arcname_from(name): 130 # Always use unix path separators. 131 normalized_arcname = name.replace(os.path.sep, "/") 132 # Don't manipulate names filenames in the .distinfo or .data directories. 133 if normalized_arcname.startswith(self._distribution_prefix): 134 return normalized_arcname 135 for prefix in self._strip_path_prefixes: 136 if normalized_arcname.startswith(prefix): 137 return normalized_arcname[len(prefix) :] 138 139 return normalized_arcname 140 141 if os.path.isdir(real_filename): 142 directory_contents = os.listdir(real_filename) 143 for file_ in directory_contents: 144 self.add_file( 145 "{}/{}".format(package_filename, file_), 146 "{}/{}".format(real_filename, file_), 147 ) 148 return 149 150 arcname = arcname_from(package_filename) 151 zinfo = self._zipinfo(arcname) 152 153 # Write file to the zip archive while computing the hash and length 154 hash = hashlib.sha256() 155 size = 0 156 with open(real_filename, "rb") as fsrc: 157 with self.open(zinfo, "w") as fdst: 158 while True: 159 block = fsrc.read(2**20) 160 if not block: 161 break 162 fdst.write(block) 163 hash.update(block) 164 size += len(block) 165 166 self._add_to_record(arcname, self._serialize_digest(hash), size) 167 168 def add_string(self, filename, contents): 169 """Add given 'contents' as filename to the distribution.""" 170 if isinstance(contents, str): 171 contents = contents.encode("utf-8", "surrogateescape") 172 zinfo = self._zipinfo(filename) 173 self.writestr(zinfo, contents) 174 hash = hashlib.sha256() 175 hash.update(contents) 176 self._add_to_record(filename, self._serialize_digest(hash), len(contents)) 177 178 def _serialize_digest(self, hash): 179 # https://www.python.org/dev/peps/pep-0376/#record 180 # "base64.urlsafe_b64encode(digest) with trailing = removed" 181 digest = base64.urlsafe_b64encode(hash.digest()) 182 digest = b"sha256=" + digest.rstrip(b"=") 183 return digest 184 185 def _add_to_record(self, filename, hash, size): 186 size = str(size).encode("ascii") 187 self._record.append((filename, hash, size)) 188 189 def _zipinfo(self, filename): 190 """Construct deterministic ZipInfo entry for a file named filename""" 191 # Strip leading path separators to mirror ZipInfo.from_file behavior 192 separators = os.path.sep 193 if os.path.altsep is not None: 194 separators += os.path.altsep 195 arcname = filename.lstrip(separators) 196 197 zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH) 198 zinfo.create_system = 3 # ZipInfo entry created on a unix-y system 199 # Both pip and installer expect the regular file bit to be set in order for the 200 # executable bit to be preserved after extraction 201 # https://github.com/pypa/pip/blob/23.3.2/src/pip/_internal/utils/unpacking.py#L96-L100 202 # https://github.com/pypa/installer/blob/0.7.0/src/installer/sources.py#L310-L313 203 zinfo.external_attr = ( 204 stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO | stat.S_IFREG 205 ) << 16 # permissions: -rwxrwxrwx 206 zinfo.compress_type = self.compression 207 return zinfo 208 209 def add_recordfile(self): 210 """Write RECORD file to the distribution.""" 211 record_path = self.distinfo_path("RECORD") 212 entries = self._record + [(record_path, b"", b"")] 213 with io.StringIO() as contents_io: 214 writer = csv.writer(contents_io, lineterminator="\n") 215 for filename, digest, size in entries: 216 if isinstance(filename, str): 217 filename = filename.lstrip("/") 218 writer.writerow( 219 ( 220 c 221 if isinstance(c, str) 222 else c.decode("utf-8", "surrogateescape") 223 for c in (filename, digest, size) 224 ) 225 ) 226 227 contents = contents_io.getvalue() 228 self.add_string(record_path, contents) 229 return contents.encode("utf-8", "surrogateescape") 230 231 232class WheelMaker(object): 233 def __init__( 234 self, 235 name, 236 version, 237 build_tag, 238 python_tag, 239 abi, 240 platform, 241 compress, 242 outfile=None, 243 strip_path_prefixes=None, 244 ): 245 self._name = name 246 self._version = normalize_pep440(version) 247 self._build_tag = build_tag 248 self._python_tag = python_tag 249 self._abi = abi 250 self._platform = platform 251 self._outfile = outfile 252 self._strip_path_prefixes = strip_path_prefixes 253 self._compress = compress 254 self._wheelname_fragment_distribution_name = escape_filename_distribution_name( 255 self._name 256 ) 257 258 self._distribution_prefix = ( 259 self._wheelname_fragment_distribution_name + "-" + self._version 260 ) 261 262 self._whlfile = None 263 264 def __enter__(self): 265 self._whlfile = _WhlFile( 266 self.filename(), 267 mode="w", 268 distribution_prefix=self._distribution_prefix, 269 strip_path_prefixes=self._strip_path_prefixes, 270 compression=zipfile.ZIP_DEFLATED if self._compress else zipfile.ZIP_STORED, 271 ) 272 return self 273 274 def __exit__(self, type, value, traceback): 275 self._whlfile.close() 276 self._whlfile = None 277 278 def wheelname(self) -> str: 279 components = [ 280 self._wheelname_fragment_distribution_name, 281 self._version, 282 ] 283 if self._build_tag: 284 components.append(self._build_tag) 285 components += [self._python_tag, self._abi, self._platform] 286 return "-".join(components) + ".whl" 287 288 def filename(self) -> str: 289 if self._outfile: 290 return self._outfile 291 return self.wheelname() 292 293 def disttags(self): 294 return ["-".join([self._python_tag, self._abi, self._platform])] 295 296 def distinfo_path(self, basename): 297 return self._whlfile.distinfo_path(basename) 298 299 def data_path(self, basename): 300 return self._whlfile.data_path(basename) 301 302 def add_file(self, package_filename, real_filename): 303 """Add given file to the distribution.""" 304 self._whlfile.add_file(package_filename, real_filename) 305 306 def add_wheelfile(self): 307 """Write WHEEL file to the distribution""" 308 # TODO(pstradomski): Support non-purelib wheels. 309 wheel_contents = """\ 310Wheel-Version: 1.0 311Generator: bazel-wheelmaker 1.0 312Root-Is-Purelib: {} 313""".format( 314 "true" if self._platform == "any" else "false" 315 ) 316 for tag in self.disttags(): 317 wheel_contents += "Tag: %s\n" % tag 318 self._whlfile.add_string(self.distinfo_path("WHEEL"), wheel_contents) 319 320 def add_metadata(self, metadata, name, description): 321 """Write METADATA file to the distribution.""" 322 # https://www.python.org/dev/peps/pep-0566/ 323 # https://packaging.python.org/specifications/core-metadata/ 324 metadata = re.sub("^Name: .*$", "Name: %s" % name, metadata, flags=re.MULTILINE) 325 metadata += "Version: %s\n\n" % self._version 326 # setuptools seems to insert UNKNOWN as description when none is 327 # provided. 328 metadata += description if description else "UNKNOWN" 329 metadata += "\n" 330 self._whlfile.add_string(self.distinfo_path("METADATA"), metadata) 331 332 def add_recordfile(self): 333 """Write RECORD file to the distribution.""" 334 self._whlfile.add_recordfile() 335 336 337def get_files_to_package(input_files): 338 """Find files to be added to the distribution. 339 340 input_files: list of pairs (package_path, real_path) 341 """ 342 files = {} 343 for package_path, real_path in input_files: 344 files[package_path] = real_path 345 return files 346 347 348def resolve_argument_stamp( 349 argument: str, volatile_status_stamp: Path, stable_status_stamp: Path 350) -> str: 351 """Resolve workspace status stamps format strings found in the argument string 352 353 Args: 354 argument (str): The raw argument represenation for the wheel (may include stamp variables) 355 volatile_status_stamp (Path): The path to a volatile workspace status file 356 stable_status_stamp (Path): The path to a stable workspace status file 357 358 Returns: 359 str: A resolved argument string 360 """ 361 lines = ( 362 volatile_status_stamp.read_text().splitlines() 363 + stable_status_stamp.read_text().splitlines() 364 ) 365 for line in lines: 366 if not line: 367 continue 368 key, value = line.split(" ", maxsplit=1) 369 stamp = "{" + key + "}" 370 argument = argument.replace(stamp, value) 371 372 return argument 373 374 375def parse_args() -> argparse.Namespace: 376 parser = argparse.ArgumentParser(description="Builds a python wheel") 377 metadata_group = parser.add_argument_group("Wheel name, version and platform") 378 metadata_group.add_argument( 379 "--name", required=True, type=str, help="Name of the distribution" 380 ) 381 metadata_group.add_argument( 382 "--version", required=True, type=str, help="Version of the distribution" 383 ) 384 metadata_group.add_argument( 385 "--build_tag", 386 type=str, 387 default="", 388 help="Optional build tag for the distribution", 389 ) 390 metadata_group.add_argument( 391 "--python_tag", 392 type=str, 393 default="py3", 394 help="Python version, e.g. 'py2' or 'py3'", 395 ) 396 metadata_group.add_argument("--abi", type=str, default="none") 397 metadata_group.add_argument( 398 "--platform", type=str, default="any", help="Target platform. " 399 ) 400 401 output_group = parser.add_argument_group("Output file location") 402 output_group.add_argument( 403 "--out", type=str, default=None, help="Override name of ouptut file" 404 ) 405 output_group.add_argument( 406 "--no_compress", 407 action="store_true", 408 help="Disable compression of the final archive", 409 ) 410 output_group.add_argument( 411 "--name_file", 412 type=Path, 413 help="A file where the canonical name of the " "wheel will be written", 414 ) 415 416 output_group.add_argument( 417 "--strip_path_prefix", 418 type=str, 419 action="append", 420 default=[], 421 help="Path prefix to be stripped from input package files' path. " 422 "Can be supplied multiple times. Evaluated in order.", 423 ) 424 425 wheel_group = parser.add_argument_group("Wheel metadata") 426 wheel_group.add_argument( 427 "--metadata_file", 428 type=Path, 429 help="Contents of the METADATA file (before appending contents of " 430 "--description_file)", 431 ) 432 wheel_group.add_argument( 433 "--description_file", help="Path to the file with package description" 434 ) 435 wheel_group.add_argument( 436 "--description_content_type", help="Content type of the package description" 437 ) 438 wheel_group.add_argument( 439 "--entry_points_file", 440 help="Path to a correctly-formatted entry_points.txt file", 441 ) 442 443 contents_group = parser.add_argument_group("Wheel contents") 444 contents_group.add_argument( 445 "--input_file", 446 action="append", 447 help="'package_path;real_path' pairs listing " 448 "files to be included in the wheel. " 449 "Can be supplied multiple times.", 450 ) 451 contents_group.add_argument( 452 "--input_file_list", 453 action="append", 454 help="A file that has all the input files defined as a list to avoid " 455 "the long command", 456 ) 457 contents_group.add_argument( 458 "--extra_distinfo_file", 459 action="append", 460 help="'filename;real_path' pairs listing extra files to include in" 461 "dist-info directory. Can be supplied multiple times.", 462 ) 463 contents_group.add_argument( 464 "--data_files", 465 action="append", 466 help="'filename;real_path' pairs listing data files to include in" 467 "data directory. Can be supplied multiple times.", 468 ) 469 470 build_group = parser.add_argument_group("Building requirements") 471 build_group.add_argument( 472 "--volatile_status_file", 473 type=Path, 474 help="Pass in the stamp info file for stamping", 475 ) 476 build_group.add_argument( 477 "--stable_status_file", 478 type=Path, 479 help="Pass in the stamp info file for stamping", 480 ) 481 482 return parser.parse_args(sys.argv[1:]) 483 484 485def _parse_file_pairs(content: List[str]) -> List[List[str]]: 486 """ 487 Parse ; delimited lists of files into a 2D list. 488 """ 489 return [i.split(";", maxsplit=1) for i in content or []] 490 491 492def main() -> None: 493 arguments = parse_args() 494 495 input_files = _parse_file_pairs(arguments.input_file) 496 extra_distinfo_file = _parse_file_pairs(arguments.extra_distinfo_file) 497 data_files = _parse_file_pairs(arguments.data_files) 498 499 for input_file in arguments.input_file_list: 500 with open(input_file) as _file: 501 input_file_list = _file.read().splitlines() 502 for _input_file in input_file_list: 503 input_files.append(_input_file.split(";")) 504 505 all_files = get_files_to_package(input_files) 506 # Sort the files for reproducible order in the archive. 507 all_files = sorted(all_files.items()) 508 509 strip_prefixes = [p for p in arguments.strip_path_prefix] 510 511 if arguments.volatile_status_file and arguments.stable_status_file: 512 name = resolve_argument_stamp( 513 arguments.name, 514 arguments.volatile_status_file, 515 arguments.stable_status_file, 516 ) 517 else: 518 name = arguments.name 519 520 if arguments.volatile_status_file and arguments.stable_status_file: 521 version = resolve_argument_stamp( 522 arguments.version, 523 arguments.volatile_status_file, 524 arguments.stable_status_file, 525 ) 526 else: 527 version = arguments.version 528 529 with WheelMaker( 530 name=name, 531 version=version, 532 build_tag=arguments.build_tag, 533 python_tag=arguments.python_tag, 534 abi=arguments.abi, 535 platform=arguments.platform, 536 outfile=arguments.out, 537 strip_path_prefixes=strip_prefixes, 538 compress=not arguments.no_compress, 539 ) as maker: 540 for package_filename, real_filename in all_files: 541 maker.add_file(package_filename, real_filename) 542 maker.add_wheelfile() 543 544 description = None 545 if arguments.description_file: 546 with open( 547 arguments.description_file, "rt", encoding="utf-8" 548 ) as description_file: 549 description = description_file.read() 550 551 metadata = arguments.metadata_file.read_text(encoding="utf-8") 552 553 # This is not imported at the top of the file due to the reliance 554 # on this file in the `whl_library` repository rule which does not 555 # provide `packaging` but does import symbols defined here. 556 from packaging.requirements import Requirement 557 558 # Search for any `Requires-Dist` entries that refer to other files and 559 # expand them. 560 561 def get_new_requirement_line(reqs_text, extra): 562 req = Requirement(reqs_text.strip()) 563 if req.marker: 564 if extra: 565 return f"Requires-Dist: {req.name}{req.specifier}; ({req.marker}) and {extra}" 566 else: 567 return f"Requires-Dist: {req.name}{req.specifier}; {req.marker}" 568 else: 569 return f"Requires-Dist: {req.name}{req.specifier}; {extra}".strip(" ;") 570 571 for meta_line in metadata.splitlines(): 572 if not meta_line.startswith("Requires-Dist: "): 573 continue 574 575 if not meta_line[len("Requires-Dist: ") :].startswith("@"): 576 # This is a normal requirement. 577 package, _, extra = meta_line[len("Requires-Dist: ") :].rpartition(";") 578 if not package: 579 # This is when the package requirement does not have markers. 580 continue 581 extra = extra.strip() 582 metadata = metadata.replace( 583 meta_line, get_new_requirement_line(package, extra) 584 ) 585 continue 586 587 # This is a requirement that refers to a file. 588 file, _, extra = meta_line[len("Requires-Dist: @") :].partition(";") 589 extra = extra.strip() 590 591 reqs = [] 592 for reqs_line in Path(file).read_text(encoding="utf-8").splitlines(): 593 reqs_text = reqs_line.strip() 594 if not reqs_text or reqs_text.startswith(("#", "-")): 595 continue 596 597 # Strip any comments 598 reqs_text, _, _ = reqs_text.partition("#") 599 600 reqs.append(get_new_requirement_line(reqs_text, extra)) 601 602 metadata = metadata.replace(meta_line, "\n".join(reqs)) 603 604 maker.add_metadata( 605 metadata=metadata, 606 name=name, 607 description=description, 608 ) 609 610 if arguments.entry_points_file: 611 maker.add_file( 612 maker.distinfo_path("entry_points.txt"), arguments.entry_points_file 613 ) 614 615 # Sort the files for reproducible order in the archive. 616 for filename, real_path in sorted(data_files): 617 maker.add_file(maker.data_path(filename), real_path) 618 for filename, real_path in sorted(extra_distinfo_file): 619 maker.add_file(maker.distinfo_path(filename), real_path) 620 621 maker.add_recordfile() 622 623 # Since stamping may otherwise change the target name of the 624 # wheel, the canonical name (with stamps resolved) is written 625 # to a file so consumers of the wheel can easily determine 626 # the correct name. 627 arguments.name_file.write_text(maker.wheelname()) 628 629 630if __name__ == "__main__": 631 main() 632