• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#    http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from __future__ import annotations
16
17import argparse
18import base64
19import csv
20import hashlib
21import io
22import os
23import re
24import stat
25import sys
26import zipfile
27from pathlib import Path
28
29_ZIP_EPOCH = (1980, 1, 1, 0, 0, 0)
30
31
32def commonpath(path1, path2):
33    ret = []
34    for a, b in zip(path1.split(os.path.sep), path2.split(os.path.sep)):
35        if a != b:
36            break
37        ret.append(a)
38    return os.path.sep.join(ret)
39
40
41def escape_filename_segment(segment):
42    """Escapes a filename segment per https://www.python.org/dev/peps/pep-0427/#escaping-and-unicode
43
44    This is a legacy function, kept for backwards compatibility,
45    and may be removed in the future. See `escape_filename_distribution_name`
46    and `normalize_pep440` for the modern alternatives.
47    """
48    return re.sub(r"[^\w\d.]+", "_", segment, re.UNICODE)
49
50
51def normalize_package_name(name):
52    """Normalize a package name according to the Python Packaging User Guide.
53
54    See https://packaging.python.org/en/latest/specifications/name-normalization/
55    """
56    return re.sub(r"[-_.]+", "-", name).lower()
57
58
59def escape_filename_distribution_name(name):
60    """Escape the distribution name component of a filename.
61
62    See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode
63    """
64    return normalize_package_name(name).replace("-", "_")
65
66
67def normalize_pep440(version):
68    """Normalize version according to PEP 440, with fallback for placeholders.
69
70    If there's a placeholder in braces, such as {BUILD_TIMESTAMP},
71    replace it with 0. Such placeholders can be used with stamping, in
72    which case they would have been resolved already by now; if they
73    haven't, we're doing an unstamped build, but we still need to
74    produce a valid version. If such replacements are made, the
75    original version string, sanitized to dot-separated alphanumerics,
76    is appended as a local version segment, so you understand what
77    placeholder was involved.
78
79    If that still doesn't produce a valid version, use version 0 and
80    append the original version string, sanitized to dot-separated
81    alphanumerics, as a local version segment.
82
83    """
84
85    import packaging.version
86
87    try:
88        return str(packaging.version.Version(version))
89    except packaging.version.InvalidVersion:
90        pass
91
92    sanitized = re.sub(r"[^a-z0-9]+", ".", version.lower()).strip(".")
93    substituted = re.sub(r"\{\w+\}", "0", version)
94    delimiter = "." if "+" in substituted else "+"
95    try:
96        return str(packaging.version.Version(f"{substituted}{delimiter}{sanitized}"))
97    except packaging.version.InvalidVersion:
98        return str(packaging.version.Version(f"0+{sanitized}"))
99
100
101class _WhlFile(zipfile.ZipFile):
102    def __init__(
103        self,
104        filename,
105        *,
106        mode,
107        distribution_prefix: str,
108        strip_path_prefixes=None,
109        compression=zipfile.ZIP_DEFLATED,
110        **kwargs,
111    ):
112        self._distribution_prefix = distribution_prefix
113
114        self._strip_path_prefixes = strip_path_prefixes or []
115        # Entries for the RECORD file as (filename, hash, size) tuples.
116        self._record = []
117
118        super().__init__(filename, mode=mode, compression=compression, **kwargs)
119
120    def distinfo_path(self, basename):
121        return f"{self._distribution_prefix}.dist-info/{basename}"
122
123    def data_path(self, basename):
124        return f"{self._distribution_prefix}.data/{basename}"
125
126    def add_file(self, package_filename, real_filename):
127        """Add given file to the distribution."""
128
129        def arcname_from(name):
130            # Always use unix path separators.
131            normalized_arcname = name.replace(os.path.sep, "/")
132            # Don't manipulate names filenames in the .distinfo or .data directories.
133            if normalized_arcname.startswith(self._distribution_prefix):
134                return normalized_arcname
135            for prefix in self._strip_path_prefixes:
136                if normalized_arcname.startswith(prefix):
137                    return normalized_arcname[len(prefix) :]
138
139            return normalized_arcname
140
141        if os.path.isdir(real_filename):
142            directory_contents = os.listdir(real_filename)
143            for file_ in directory_contents:
144                self.add_file(
145                    "{}/{}".format(package_filename, file_),
146                    "{}/{}".format(real_filename, file_),
147                )
148            return
149
150        arcname = arcname_from(package_filename)
151        zinfo = self._zipinfo(arcname)
152
153        # Write file to the zip archive while computing the hash and length
154        hash = hashlib.sha256()
155        size = 0
156        with open(real_filename, "rb") as fsrc:
157            with self.open(zinfo, "w") as fdst:
158                while True:
159                    block = fsrc.read(2**20)
160                    if not block:
161                        break
162                    fdst.write(block)
163                    hash.update(block)
164                    size += len(block)
165
166        self._add_to_record(arcname, self._serialize_digest(hash), size)
167
168    def add_string(self, filename, contents):
169        """Add given 'contents' as filename to the distribution."""
170        if isinstance(contents, str):
171            contents = contents.encode("utf-8", "surrogateescape")
172        zinfo = self._zipinfo(filename)
173        self.writestr(zinfo, contents)
174        hash = hashlib.sha256()
175        hash.update(contents)
176        self._add_to_record(filename, self._serialize_digest(hash), len(contents))
177
178    def _serialize_digest(self, hash):
179        # https://www.python.org/dev/peps/pep-0376/#record
180        # "base64.urlsafe_b64encode(digest) with trailing = removed"
181        digest = base64.urlsafe_b64encode(hash.digest())
182        digest = b"sha256=" + digest.rstrip(b"=")
183        return digest
184
185    def _add_to_record(self, filename, hash, size):
186        size = str(size).encode("ascii")
187        self._record.append((filename, hash, size))
188
189    def _zipinfo(self, filename):
190        """Construct deterministic ZipInfo entry for a file named filename"""
191        # Strip leading path separators to mirror ZipInfo.from_file behavior
192        separators = os.path.sep
193        if os.path.altsep is not None:
194            separators += os.path.altsep
195        arcname = filename.lstrip(separators)
196
197        zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH)
198        zinfo.create_system = 3  # ZipInfo entry created on a unix-y system
199        # Both pip and installer expect the regular file bit to be set in order for the
200        # executable bit to be preserved after extraction
201        # https://github.com/pypa/pip/blob/23.3.2/src/pip/_internal/utils/unpacking.py#L96-L100
202        # https://github.com/pypa/installer/blob/0.7.0/src/installer/sources.py#L310-L313
203        zinfo.external_attr = (
204            stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO | stat.S_IFREG
205        ) << 16  # permissions: -rwxrwxrwx
206        zinfo.compress_type = self.compression
207        return zinfo
208
209    def add_recordfile(self):
210        """Write RECORD file to the distribution."""
211        record_path = self.distinfo_path("RECORD")
212        entries = self._record + [(record_path, b"", b"")]
213        with io.StringIO() as contents_io:
214            writer = csv.writer(contents_io, lineterminator="\n")
215            for filename, digest, size in entries:
216                if isinstance(filename, str):
217                    filename = filename.lstrip("/")
218                writer.writerow(
219                    (
220                        c
221                        if isinstance(c, str)
222                        else c.decode("utf-8", "surrogateescape")
223                        for c in (filename, digest, size)
224                    )
225                )
226
227            contents = contents_io.getvalue()
228            self.add_string(record_path, contents)
229            return contents.encode("utf-8", "surrogateescape")
230
231
232class WheelMaker(object):
233    def __init__(
234        self,
235        name,
236        version,
237        build_tag,
238        python_tag,
239        abi,
240        platform,
241        compress,
242        outfile=None,
243        strip_path_prefixes=None,
244    ):
245        self._name = name
246        self._version = normalize_pep440(version)
247        self._build_tag = build_tag
248        self._python_tag = python_tag
249        self._abi = abi
250        self._platform = platform
251        self._outfile = outfile
252        self._strip_path_prefixes = strip_path_prefixes
253        self._compress = compress
254        self._wheelname_fragment_distribution_name = escape_filename_distribution_name(
255            self._name
256        )
257
258        self._distribution_prefix = (
259            self._wheelname_fragment_distribution_name + "-" + self._version
260        )
261
262        self._whlfile = None
263
264    def __enter__(self):
265        self._whlfile = _WhlFile(
266            self.filename(),
267            mode="w",
268            distribution_prefix=self._distribution_prefix,
269            strip_path_prefixes=self._strip_path_prefixes,
270            compression=zipfile.ZIP_DEFLATED if self._compress else zipfile.ZIP_STORED,
271        )
272        return self
273
274    def __exit__(self, type, value, traceback):
275        self._whlfile.close()
276        self._whlfile = None
277
278    def wheelname(self) -> str:
279        components = [
280            self._wheelname_fragment_distribution_name,
281            self._version,
282        ]
283        if self._build_tag:
284            components.append(self._build_tag)
285        components += [self._python_tag, self._abi, self._platform]
286        return "-".join(components) + ".whl"
287
288    def filename(self) -> str:
289        if self._outfile:
290            return self._outfile
291        return self.wheelname()
292
293    def disttags(self):
294        return ["-".join([self._python_tag, self._abi, self._platform])]
295
296    def distinfo_path(self, basename):
297        return self._whlfile.distinfo_path(basename)
298
299    def data_path(self, basename):
300        return self._whlfile.data_path(basename)
301
302    def add_file(self, package_filename, real_filename):
303        """Add given file to the distribution."""
304        self._whlfile.add_file(package_filename, real_filename)
305
306    def add_wheelfile(self):
307        """Write WHEEL file to the distribution"""
308        # TODO(pstradomski): Support non-purelib wheels.
309        wheel_contents = """\
310Wheel-Version: 1.0
311Generator: bazel-wheelmaker 1.0
312Root-Is-Purelib: {}
313""".format(
314            "true" if self._platform == "any" else "false"
315        )
316        for tag in self.disttags():
317            wheel_contents += "Tag: %s\n" % tag
318        self._whlfile.add_string(self.distinfo_path("WHEEL"), wheel_contents)
319
320    def add_metadata(self, metadata, name, description):
321        """Write METADATA file to the distribution."""
322        # https://www.python.org/dev/peps/pep-0566/
323        # https://packaging.python.org/specifications/core-metadata/
324        metadata = re.sub("^Name: .*$", "Name: %s" % name, metadata, flags=re.MULTILINE)
325        metadata += "Version: %s\n\n" % self._version
326        # setuptools seems to insert UNKNOWN as description when none is
327        # provided.
328        metadata += description if description else "UNKNOWN"
329        metadata += "\n"
330        self._whlfile.add_string(self.distinfo_path("METADATA"), metadata)
331
332    def add_recordfile(self):
333        """Write RECORD file to the distribution."""
334        self._whlfile.add_recordfile()
335
336
337def get_files_to_package(input_files):
338    """Find files to be added to the distribution.
339
340    input_files: list of pairs (package_path, real_path)
341    """
342    files = {}
343    for package_path, real_path in input_files:
344        files[package_path] = real_path
345    return files
346
347
348def resolve_argument_stamp(
349    argument: str, volatile_status_stamp: Path, stable_status_stamp: Path
350) -> str:
351    """Resolve workspace status stamps format strings found in the argument string
352
353    Args:
354        argument (str): The raw argument represenation for the wheel (may include stamp variables)
355        volatile_status_stamp (Path): The path to a volatile workspace status file
356        stable_status_stamp (Path): The path to a stable workspace status file
357
358    Returns:
359        str: A resolved argument string
360    """
361    lines = (
362        volatile_status_stamp.read_text().splitlines()
363        + stable_status_stamp.read_text().splitlines()
364    )
365    for line in lines:
366        if not line:
367            continue
368        key, value = line.split(" ", maxsplit=1)
369        stamp = "{" + key + "}"
370        argument = argument.replace(stamp, value)
371
372    return argument
373
374
375def parse_args() -> argparse.Namespace:
376    parser = argparse.ArgumentParser(description="Builds a python wheel")
377    metadata_group = parser.add_argument_group("Wheel name, version and platform")
378    metadata_group.add_argument(
379        "--name", required=True, type=str, help="Name of the distribution"
380    )
381    metadata_group.add_argument(
382        "--version", required=True, type=str, help="Version of the distribution"
383    )
384    metadata_group.add_argument(
385        "--build_tag",
386        type=str,
387        default="",
388        help="Optional build tag for the distribution",
389    )
390    metadata_group.add_argument(
391        "--python_tag",
392        type=str,
393        default="py3",
394        help="Python version, e.g. 'py2' or 'py3'",
395    )
396    metadata_group.add_argument("--abi", type=str, default="none")
397    metadata_group.add_argument(
398        "--platform", type=str, default="any", help="Target platform. "
399    )
400
401    output_group = parser.add_argument_group("Output file location")
402    output_group.add_argument(
403        "--out", type=str, default=None, help="Override name of ouptut file"
404    )
405    output_group.add_argument(
406        "--no_compress",
407        action="store_true",
408        help="Disable compression of the final archive",
409    )
410    output_group.add_argument(
411        "--name_file",
412        type=Path,
413        help="A file where the canonical name of the " "wheel will be written",
414    )
415
416    output_group.add_argument(
417        "--strip_path_prefix",
418        type=str,
419        action="append",
420        default=[],
421        help="Path prefix to be stripped from input package files' path. "
422        "Can be supplied multiple times. Evaluated in order.",
423    )
424
425    wheel_group = parser.add_argument_group("Wheel metadata")
426    wheel_group.add_argument(
427        "--metadata_file",
428        type=Path,
429        help="Contents of the METADATA file (before appending contents of "
430        "--description_file)",
431    )
432    wheel_group.add_argument(
433        "--description_file", help="Path to the file with package description"
434    )
435    wheel_group.add_argument(
436        "--description_content_type", help="Content type of the package description"
437    )
438    wheel_group.add_argument(
439        "--entry_points_file",
440        help="Path to a correctly-formatted entry_points.txt file",
441    )
442
443    contents_group = parser.add_argument_group("Wheel contents")
444    contents_group.add_argument(
445        "--input_file",
446        action="append",
447        help="'package_path;real_path' pairs listing "
448        "files to be included in the wheel. "
449        "Can be supplied multiple times.",
450    )
451    contents_group.add_argument(
452        "--input_file_list",
453        action="append",
454        help="A file that has all the input files defined as a list to avoid "
455        "the long command",
456    )
457    contents_group.add_argument(
458        "--extra_distinfo_file",
459        action="append",
460        help="'filename;real_path' pairs listing extra files to include in"
461        "dist-info directory. Can be supplied multiple times.",
462    )
463    contents_group.add_argument(
464        "--data_files",
465        action="append",
466        help="'filename;real_path' pairs listing data files to include in"
467        "data directory. Can be supplied multiple times.",
468    )
469
470    build_group = parser.add_argument_group("Building requirements")
471    build_group.add_argument(
472        "--volatile_status_file",
473        type=Path,
474        help="Pass in the stamp info file for stamping",
475    )
476    build_group.add_argument(
477        "--stable_status_file",
478        type=Path,
479        help="Pass in the stamp info file for stamping",
480    )
481
482    return parser.parse_args(sys.argv[1:])
483
484
485def _parse_file_pairs(content: List[str]) -> List[List[str]]:
486    """
487    Parse ; delimited lists of files into a 2D list.
488    """
489    return [i.split(";", maxsplit=1) for i in content or []]
490
491
492def main() -> None:
493    arguments = parse_args()
494
495    input_files = _parse_file_pairs(arguments.input_file)
496    extra_distinfo_file = _parse_file_pairs(arguments.extra_distinfo_file)
497    data_files = _parse_file_pairs(arguments.data_files)
498
499    for input_file in arguments.input_file_list:
500        with open(input_file) as _file:
501            input_file_list = _file.read().splitlines()
502        for _input_file in input_file_list:
503            input_files.append(_input_file.split(";"))
504
505    all_files = get_files_to_package(input_files)
506    # Sort the files for reproducible order in the archive.
507    all_files = sorted(all_files.items())
508
509    strip_prefixes = [p for p in arguments.strip_path_prefix]
510
511    if arguments.volatile_status_file and arguments.stable_status_file:
512        name = resolve_argument_stamp(
513            arguments.name,
514            arguments.volatile_status_file,
515            arguments.stable_status_file,
516        )
517    else:
518        name = arguments.name
519
520    if arguments.volatile_status_file and arguments.stable_status_file:
521        version = resolve_argument_stamp(
522            arguments.version,
523            arguments.volatile_status_file,
524            arguments.stable_status_file,
525        )
526    else:
527        version = arguments.version
528
529    with WheelMaker(
530        name=name,
531        version=version,
532        build_tag=arguments.build_tag,
533        python_tag=arguments.python_tag,
534        abi=arguments.abi,
535        platform=arguments.platform,
536        outfile=arguments.out,
537        strip_path_prefixes=strip_prefixes,
538        compress=not arguments.no_compress,
539    ) as maker:
540        for package_filename, real_filename in all_files:
541            maker.add_file(package_filename, real_filename)
542        maker.add_wheelfile()
543
544        description = None
545        if arguments.description_file:
546            with open(
547                arguments.description_file, "rt", encoding="utf-8"
548            ) as description_file:
549                description = description_file.read()
550
551        metadata = arguments.metadata_file.read_text(encoding="utf-8")
552
553        # This is not imported at the top of the file due to the reliance
554        # on this file in the `whl_library` repository rule which does not
555        # provide `packaging` but does import symbols defined here.
556        from packaging.requirements import Requirement
557
558        # Search for any `Requires-Dist` entries that refer to other files and
559        # expand them.
560
561        def get_new_requirement_line(reqs_text, extra):
562            req = Requirement(reqs_text.strip())
563            if req.marker:
564                if extra:
565                    return f"Requires-Dist: {req.name}{req.specifier}; ({req.marker}) and {extra}"
566                else:
567                    return f"Requires-Dist: {req.name}{req.specifier}; {req.marker}"
568            else:
569                return f"Requires-Dist: {req.name}{req.specifier}; {extra}".strip(" ;")
570
571        for meta_line in metadata.splitlines():
572            if not meta_line.startswith("Requires-Dist: "):
573                continue
574
575            if not meta_line[len("Requires-Dist: ") :].startswith("@"):
576                # This is a normal requirement.
577                package, _, extra = meta_line[len("Requires-Dist: ") :].rpartition(";")
578                if not package:
579                    # This is when the package requirement does not have markers.
580                    continue
581                extra = extra.strip()
582                metadata = metadata.replace(
583                    meta_line, get_new_requirement_line(package, extra)
584                )
585                continue
586
587            # This is a requirement that refers to a file.
588            file, _, extra = meta_line[len("Requires-Dist: @") :].partition(";")
589            extra = extra.strip()
590
591            reqs = []
592            for reqs_line in Path(file).read_text(encoding="utf-8").splitlines():
593                reqs_text = reqs_line.strip()
594                if not reqs_text or reqs_text.startswith(("#", "-")):
595                    continue
596
597                # Strip any comments
598                reqs_text, _, _ = reqs_text.partition("#")
599
600                reqs.append(get_new_requirement_line(reqs_text, extra))
601
602            metadata = metadata.replace(meta_line, "\n".join(reqs))
603
604        maker.add_metadata(
605            metadata=metadata,
606            name=name,
607            description=description,
608        )
609
610        if arguments.entry_points_file:
611            maker.add_file(
612                maker.distinfo_path("entry_points.txt"), arguments.entry_points_file
613            )
614
615        # Sort the files for reproducible order in the archive.
616        for filename, real_path in sorted(data_files):
617            maker.add_file(maker.data_path(filename), real_path)
618        for filename, real_path in sorted(extra_distinfo_file):
619            maker.add_file(maker.distinfo_path(filename), real_path)
620
621        maker.add_recordfile()
622
623        # Since stamping may otherwise change the target name of the
624        # wheel, the canonical name (with stamps resolved) is written
625        # to a file so consumers of the wheel can easily determine
626        # the correct name.
627        arguments.name_file.write_text(maker.wheelname())
628
629
630if __name__ == "__main__":
631    main()
632