• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2023 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import argparse
16import json
17import pathlib
18import re
19import sys
20import zipfile
21
22
23# Generator is the modules_mapping.json file generator.
24class Generator:
25    stderr = None
26    output_file = None
27    excluded_patterns = None
28    mapping = {}
29
30    def __init__(self, stderr, output_file, excluded_patterns):
31        self.stderr = stderr
32        self.output_file = output_file
33        self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns]
34
35    # dig_wheel analyses the wheel .whl file determining the modules it provides
36    # by looking at the directory structure.
37    def dig_wheel(self, whl):
38        with zipfile.ZipFile(whl, "r") as zip_file:
39            for path in zip_file.namelist():
40                if is_metadata(path):
41                    if data_has_purelib_or_platlib(path):
42                        self.module_for_path(path, whl)
43                    else:
44                        continue
45                else:
46                    self.module_for_path(path, whl)
47
48    def module_for_path(self, path, whl):
49        ext = pathlib.Path(path).suffix
50        if ext == ".py" or ext == ".so":
51            if "purelib" in path or "platlib" in path:
52                root = "/".join(path.split("/")[2:])
53            else:
54                root = path
55
56            wheel_name = get_wheel_name(whl)
57
58            if root.endswith("/__init__.py"):
59                # Note the '/' here means that the __init__.py is not in the
60                # root of the wheel, therefore we can index the directory
61                # where this file is as an importable package.
62                module = root[: -len("/__init__.py")].replace("/", ".")
63                if not self.is_excluded(module):
64                    self.mapping[module] = wheel_name
65
66            # Always index the module file.
67            if ext == ".so":
68                # Also remove extra metadata that is embeded as part of
69                # the file name as an extra extension.
70                ext = "".join(pathlib.Path(root).suffixes)
71            module = root[: -len(ext)].replace("/", ".")
72            if not self.is_excluded(module):
73                self.mapping[module] = wheel_name
74
75    def is_excluded(self, module):
76        for pattern in self.excluded_patterns:
77            if pattern.search(module):
78                return True
79        return False
80
81    # run is the entrypoint for the generator.
82    def run(self, wheels):
83        for whl in wheels:
84            try:
85                self.dig_wheel(whl)
86            except AssertionError as error:
87                print(error, file=self.stderr)
88                return 1
89        mapping_json = json.dumps(self.mapping)
90        with open(self.output_file, "w") as f:
91            f.write(mapping_json)
92        return 0
93
94
95def get_wheel_name(path):
96    pp = pathlib.PurePath(path)
97    if pp.suffix != ".whl":
98        raise RuntimeError(
99            "{} is not a valid wheel file name: the wheel doesn't follow ".format(
100                pp.name
101            )
102            + "https://www.python.org/dev/peps/pep-0427/#file-name-convention"
103        )
104    return pp.name[: pp.name.find("-")]
105
106
107# is_metadata checks if the path is in a metadata directory.
108# Ref: https://www.python.org/dev/peps/pep-0427/#file-contents.
109def is_metadata(path):
110    top_level = path.split("/")[0].lower()
111    return top_level.endswith(".dist-info") or top_level.endswith(".data")
112
113
114# The .data is allowed to contain a full purelib or platlib directory
115# These get unpacked into site-packages, so require indexing too.
116# This is the same if "Root-Is-Purelib: true" is set and the files are at the root.
117# Ref: https://peps.python.org/pep-0427/#what-s-the-deal-with-purelib-vs-platlib
118def data_has_purelib_or_platlib(path):
119    maybe_lib = path.split("/")[1].lower()
120    return is_metadata(path) and (maybe_lib == "purelib" or maybe_lib == "platlib")
121
122
123if __name__ == "__main__":
124    parser = argparse.ArgumentParser(
125        prog="generator",
126        description="Generates the modules mapping used by the Gazelle manifest.",
127    )
128    parser.add_argument("--output_file", type=str)
129    parser.add_argument("--exclude_patterns", nargs="+", default=[])
130    parser.add_argument("--wheels", nargs="+", default=[])
131    args = parser.parse_args()
132    generator = Generator(sys.stderr, args.output_file, args.exclude_patterns)
133    exit(generator.run(args.wheels))
134