• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The Bazel Authors. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#    http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Runfiles lookup library for Bazel-built Python binaries and tests.
16
17See README.md for usage instructions.
18"""
19import inspect
20import os
21import posixpath
22import sys
23
24if False:
25    # Mypy needs these symbols imported, but since they only exist in python 3.5+,
26    # this import may fail at runtime. Luckily mypy can follow this conditional import.
27    from typing import Callable, Dict, Optional, Tuple, Union
28
29
30def CreateManifestBased(manifest_path):
31    # type: (str) -> _Runfiles
32    return _Runfiles(_ManifestBased(manifest_path))
33
34
35def CreateDirectoryBased(runfiles_dir_path):
36    # type: (str) -> _Runfiles
37    return _Runfiles(_DirectoryBased(runfiles_dir_path))
38
39
40def Create(env=None):
41    # type: (Optional[Dict[str, str]]) -> Optional[_Runfiles]
42    """Returns a new `Runfiles` instance.
43
44    The returned object is either:
45    - manifest-based, meaning it looks up runfile paths from a manifest file, or
46    - directory-based, meaning it looks up runfile paths under a given directory
47      path
48
49    If `env` contains "RUNFILES_MANIFEST_FILE" with non-empty value, this method
50    returns a manifest-based implementation. The object eagerly reads and caches
51    the whole manifest file upon instantiation; this may be relevant for
52    performance consideration.
53
54    Otherwise, if `env` contains "RUNFILES_DIR" with non-empty value (checked in
55    this priority order), this method returns a directory-based implementation.
56
57    If neither cases apply, this method returns null.
58
59    Args:
60      env: {string: string}; optional; the map of environment variables. If None,
61          this function uses the environment variable map of this process.
62    Raises:
63      IOError: if some IO error occurs.
64    """
65    env_map = os.environ if env is None else env
66    manifest = env_map.get("RUNFILES_MANIFEST_FILE")
67    if manifest:
68        return CreateManifestBased(manifest)
69
70    directory = env_map.get("RUNFILES_DIR")
71    if directory:
72        return CreateDirectoryBased(directory)
73
74    return None
75
76
77class _Runfiles(object):
78    """Returns the runtime location of runfiles.
79
80    Runfiles are data-dependencies of Bazel-built binaries and tests.
81    """
82
83    def __init__(self, strategy):
84        # type: (Union[_ManifestBased, _DirectoryBased]) -> None
85        self._strategy = strategy
86        self._python_runfiles_root = _FindPythonRunfilesRoot()
87        self._repo_mapping = _ParseRepoMapping(
88            strategy.RlocationChecked("_repo_mapping")
89        )
90
91    def Rlocation(self, path, source_repo=None):
92        # type: (str, Optional[str]) -> Optional[str]
93        """Returns the runtime path of a runfile.
94
95        Runfiles are data-dependencies of Bazel-built binaries and tests.
96
97        The returned path may not be valid. The caller should check the path's
98        validity and that the path exists.
99
100        The function may return None. In that case the caller can be sure that the
101        rule does not know about this data-dependency.
102
103        Args:
104          path: string; runfiles-root-relative path of the runfile
105          source_repo: string; optional; the canonical name of the repository
106            whose repository mapping should be used to resolve apparent to
107            canonical repository names in `path`. If `None` (default), the
108            repository mapping of the repository containing the caller of this
109            method is used. Explicitly setting this parameter should only be
110            necessary for libraries that want to wrap the runfiles library. Use
111            `CurrentRepository` to obtain canonical repository names.
112        Returns:
113          the path to the runfile, which the caller should check for existence, or
114          None if the method doesn't know about this runfile
115        Raises:
116          TypeError: if `path` is not a string
117          ValueError: if `path` is None or empty, or it's absolute or not normalized
118        """
119        if not path:
120            raise ValueError()
121        if not isinstance(path, str):
122            raise TypeError()
123        if (
124            path.startswith("../")
125            or "/.." in path
126            or path.startswith("./")
127            or "/./" in path
128            or path.endswith("/.")
129            or "//" in path
130        ):
131            raise ValueError('path is not normalized: "%s"' % path)
132        if path[0] == "\\":
133            raise ValueError('path is absolute without a drive letter: "%s"' % path)
134        if os.path.isabs(path):
135            return path
136
137        if source_repo is None and self._repo_mapping:
138            # Look up runfiles using the repository mapping of the caller of the
139            # current method. If the repo mapping is empty, determining this
140            # name is not necessary.
141            source_repo = self.CurrentRepository(frame=2)
142
143        # Split off the first path component, which contains the repository
144        # name (apparent or canonical).
145        target_repo, _, remainder = path.partition("/")
146        if not remainder or (source_repo, target_repo) not in self._repo_mapping:
147            # One of the following is the case:
148            # - not using Bzlmod, so the repository mapping is empty and
149            #   apparent and canonical repository names are the same
150            # - target_repo is already a canonical repository name and does not
151            #   have to be mapped.
152            # - path did not contain a slash and referred to a root symlink,
153            #   which also should not be mapped.
154            return self._strategy.RlocationChecked(path)
155
156        # target_repo is an apparent repository name. Look up the corresponding
157        # canonical repository name with respect to the current repository,
158        # identified by its canonical name.
159        target_canonical = self._repo_mapping[(source_repo, target_repo)]
160        return self._strategy.RlocationChecked(target_canonical + "/" + remainder)
161
162    def EnvVars(self):
163        # type: () -> Dict[str, str]
164        """Returns environment variables for subprocesses.
165
166        The caller should set the returned key-value pairs in the environment of
167        subprocesses in case those subprocesses are also Bazel-built binaries that
168        need to use runfiles.
169
170        Returns:
171          {string: string}; a dict; keys are environment variable names, values are
172          the values for these environment variables
173        """
174        return self._strategy.EnvVars()
175
176    def CurrentRepository(self, frame=1):
177        # type: (int) -> str
178        """Returns the canonical name of the caller's Bazel repository.
179
180        For example, this function returns '' (the empty string) when called
181        from the main repository and a string of the form
182        'rules_python~0.13.0` when called from code in the repository
183        corresponding to the rules_python Bazel module.
184
185        More information about the difference between canonical repository
186        names and the `@repo` part of labels is available at:
187        https://bazel.build/build/bzlmod#repository-names
188
189        NOTE: This function inspects the callstack to determine where in the
190        runfiles the caller is located to determine which repository it came
191        from. This may fail or produce incorrect results depending on who the
192        caller is, for example if it is not represented by a Python source
193        file. Use the `frame` argument to control the stack lookup.
194
195        Args:
196            frame: int; the stack frame to return the repository name for.
197            Defaults to 1, the caller of the CurrentRepository function.
198
199        Returns:
200            The canonical name of the Bazel repository containing the file
201            containing the frame-th caller of this function
202
203        Raises:
204            ValueError: if the caller cannot be determined or the caller's file
205            path is not contained in the Python runfiles tree
206        """
207        # pylint:disable=protected-access  # for sys._getframe
208        # pylint:disable=raise-missing-from  # we're still supporting Python 2
209        try:
210            caller_path = inspect.getfile(sys._getframe(frame))
211        except (TypeError, ValueError):
212            raise ValueError("failed to determine caller's file path")
213        caller_runfiles_path = os.path.relpath(caller_path, self._python_runfiles_root)
214        if caller_runfiles_path.startswith(".." + os.path.sep):
215            raise ValueError(
216                "{} does not lie under the runfiles root {}".format(
217                    caller_path, self._python_runfiles_root
218                )
219            )
220
221        caller_runfiles_directory = caller_runfiles_path[
222            : caller_runfiles_path.find(os.path.sep)
223        ]
224        # With Bzlmod, the runfiles directory of the main repository is always
225        # named "_main". Without Bzlmod, the value returned by this function is
226        # never used, so we just assume Bzlmod is enabled.
227        if caller_runfiles_directory == "_main":
228            # The canonical name of the main repository (also known as the
229            # workspace) is the empty string.
230            return ""
231        # For all other repositories, the name of the runfiles directory is the
232        # canonical name.
233        return caller_runfiles_directory
234
235
236def _FindPythonRunfilesRoot():
237    # type: () -> str
238    """Finds the root of the Python runfiles tree."""
239    root = __file__
240    # Walk up our own runfiles path to the root of the runfiles tree from which
241    # the current file is being run. This path coincides with what the Bazel
242    # Python stub sets up as sys.path[0]. Since that entry can be changed at
243    # runtime, we rederive it here.
244    for _ in range("rules_python/python/runfiles/runfiles.py".count("/") + 1):
245        root = os.path.dirname(root)
246    return root
247
248
249def _ParseRepoMapping(repo_mapping_path):
250    # type: (Optional[str]) -> Dict[Tuple[str, str], str]
251    """Parses the repository mapping manifest."""
252    # If the repository mapping file can't be found, that is not an error: We
253    # might be running without Bzlmod enabled or there may not be any runfiles.
254    # In this case, just apply an empty repo mapping.
255    if not repo_mapping_path:
256        return {}
257    try:
258        with open(repo_mapping_path, "r") as f:
259            content = f.read()
260    except FileNotFoundError:
261        return {}
262
263    repo_mapping = {}
264    for line in content.split("\n"):
265        if not line:
266            # Empty line following the last line break
267            break
268        current_canonical, target_local, target_canonical = line.split(",")
269        repo_mapping[(current_canonical, target_local)] = target_canonical
270
271    return repo_mapping
272
273
274class _ManifestBased(object):
275    """`Runfiles` strategy that parses a runfiles-manifest to look up runfiles."""
276
277    def __init__(self, path):
278        # type: (str) -> None
279        if not path:
280            raise ValueError()
281        if not isinstance(path, str):
282            raise TypeError()
283        self._path = path
284        self._runfiles = _ManifestBased._LoadRunfiles(path)
285
286    def RlocationChecked(self, path):
287        # type: (str) -> Optional[str]
288        """Returns the runtime path of a runfile."""
289        exact_match = self._runfiles.get(path)
290        if exact_match:
291            return exact_match
292        # If path references a runfile that lies under a directory that
293        # itself is a runfile, then only the directory is listed in the
294        # manifest. Look up all prefixes of path in the manifest and append
295        # the relative path from the prefix to the looked up path.
296        prefix_end = len(path)
297        while True:
298            prefix_end = path.rfind("/", 0, prefix_end - 1)
299            if prefix_end == -1:
300                return None
301            prefix_match = self._runfiles.get(path[0:prefix_end])
302            if prefix_match:
303                return prefix_match + "/" + path[prefix_end + 1 :]
304
305    @staticmethod
306    def _LoadRunfiles(path):
307        # type: (str) -> Dict[str, str]
308        """Loads the runfiles manifest."""
309        result = {}
310        with open(path, "r") as f:
311            for line in f:
312                line = line.strip()
313                if line:
314                    tokens = line.split(" ", 1)
315                    if len(tokens) == 1:
316                        result[line] = line
317                    else:
318                        result[tokens[0]] = tokens[1]
319        return result
320
321    def _GetRunfilesDir(self):
322        # type: () -> str
323        if self._path.endswith("/MANIFEST") or self._path.endswith("\\MANIFEST"):
324            return self._path[: -len("/MANIFEST")]
325        elif self._path.endswith(".runfiles_manifest"):
326            return self._path[: -len("_manifest")]
327        else:
328            return ""
329
330    def EnvVars(self):
331        # type: () -> Dict[str, str]
332        directory = self._GetRunfilesDir()
333        return {
334            "RUNFILES_MANIFEST_FILE": self._path,
335            "RUNFILES_DIR": directory,
336            # TODO(laszlocsomor): remove JAVA_RUNFILES once the Java launcher can
337            # pick up RUNFILES_DIR.
338            "JAVA_RUNFILES": directory,
339        }
340
341
342class _DirectoryBased(object):
343    """`Runfiles` strategy that appends runfiles paths to the runfiles root."""
344
345    def __init__(self, path):
346        # type: (str) -> None
347        if not path:
348            raise ValueError()
349        if not isinstance(path, str):
350            raise TypeError()
351        self._runfiles_root = path
352
353    def RlocationChecked(self, path):
354        # type: (str) -> str
355
356        # Use posixpath instead of os.path, because Bazel only creates a runfiles
357        # tree on Unix platforms, so `Create()` will only create a directory-based
358        # runfiles strategy on those platforms.
359        return posixpath.join(self._runfiles_root, path)
360
361    def EnvVars(self):
362        # type: () -> Dict[str, str]
363        return {
364            "RUNFILES_DIR": self._runfiles_root,
365            # TODO(laszlocsomor): remove JAVA_RUNFILES once the Java launcher can
366            # pick up RUNFILES_DIR.
367            "JAVA_RUNFILES": self._runfiles_root,
368        }
369