1# Copyright 2018 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Runfiles lookup library for Bazel-built Python binaries and tests. 16 17See README.md for usage instructions. 18""" 19import inspect 20import os 21import posixpath 22import sys 23 24if False: 25 # Mypy needs these symbols imported, but since they only exist in python 3.5+, 26 # this import may fail at runtime. Luckily mypy can follow this conditional import. 27 from typing import Callable, Dict, Optional, Tuple, Union 28 29 30def CreateManifestBased(manifest_path): 31 # type: (str) -> _Runfiles 32 return _Runfiles(_ManifestBased(manifest_path)) 33 34 35def CreateDirectoryBased(runfiles_dir_path): 36 # type: (str) -> _Runfiles 37 return _Runfiles(_DirectoryBased(runfiles_dir_path)) 38 39 40def Create(env=None): 41 # type: (Optional[Dict[str, str]]) -> Optional[_Runfiles] 42 """Returns a new `Runfiles` instance. 43 44 The returned object is either: 45 - manifest-based, meaning it looks up runfile paths from a manifest file, or 46 - directory-based, meaning it looks up runfile paths under a given directory 47 path 48 49 If `env` contains "RUNFILES_MANIFEST_FILE" with non-empty value, this method 50 returns a manifest-based implementation. The object eagerly reads and caches 51 the whole manifest file upon instantiation; this may be relevant for 52 performance consideration. 53 54 Otherwise, if `env` contains "RUNFILES_DIR" with non-empty value (checked in 55 this priority order), this method returns a directory-based implementation. 56 57 If neither cases apply, this method returns null. 58 59 Args: 60 env: {string: string}; optional; the map of environment variables. If None, 61 this function uses the environment variable map of this process. 62 Raises: 63 IOError: if some IO error occurs. 64 """ 65 env_map = os.environ if env is None else env 66 manifest = env_map.get("RUNFILES_MANIFEST_FILE") 67 if manifest: 68 return CreateManifestBased(manifest) 69 70 directory = env_map.get("RUNFILES_DIR") 71 if directory: 72 return CreateDirectoryBased(directory) 73 74 return None 75 76 77class _Runfiles(object): 78 """Returns the runtime location of runfiles. 79 80 Runfiles are data-dependencies of Bazel-built binaries and tests. 81 """ 82 83 def __init__(self, strategy): 84 # type: (Union[_ManifestBased, _DirectoryBased]) -> None 85 self._strategy = strategy 86 self._python_runfiles_root = _FindPythonRunfilesRoot() 87 self._repo_mapping = _ParseRepoMapping( 88 strategy.RlocationChecked("_repo_mapping") 89 ) 90 91 def Rlocation(self, path, source_repo=None): 92 # type: (str, Optional[str]) -> Optional[str] 93 """Returns the runtime path of a runfile. 94 95 Runfiles are data-dependencies of Bazel-built binaries and tests. 96 97 The returned path may not be valid. The caller should check the path's 98 validity and that the path exists. 99 100 The function may return None. In that case the caller can be sure that the 101 rule does not know about this data-dependency. 102 103 Args: 104 path: string; runfiles-root-relative path of the runfile 105 source_repo: string; optional; the canonical name of the repository 106 whose repository mapping should be used to resolve apparent to 107 canonical repository names in `path`. If `None` (default), the 108 repository mapping of the repository containing the caller of this 109 method is used. Explicitly setting this parameter should only be 110 necessary for libraries that want to wrap the runfiles library. Use 111 `CurrentRepository` to obtain canonical repository names. 112 Returns: 113 the path to the runfile, which the caller should check for existence, or 114 None if the method doesn't know about this runfile 115 Raises: 116 TypeError: if `path` is not a string 117 ValueError: if `path` is None or empty, or it's absolute or not normalized 118 """ 119 if not path: 120 raise ValueError() 121 if not isinstance(path, str): 122 raise TypeError() 123 if ( 124 path.startswith("../") 125 or "/.." in path 126 or path.startswith("./") 127 or "/./" in path 128 or path.endswith("/.") 129 or "//" in path 130 ): 131 raise ValueError('path is not normalized: "%s"' % path) 132 if path[0] == "\\": 133 raise ValueError('path is absolute without a drive letter: "%s"' % path) 134 if os.path.isabs(path): 135 return path 136 137 if source_repo is None and self._repo_mapping: 138 # Look up runfiles using the repository mapping of the caller of the 139 # current method. If the repo mapping is empty, determining this 140 # name is not necessary. 141 source_repo = self.CurrentRepository(frame=2) 142 143 # Split off the first path component, which contains the repository 144 # name (apparent or canonical). 145 target_repo, _, remainder = path.partition("/") 146 if not remainder or (source_repo, target_repo) not in self._repo_mapping: 147 # One of the following is the case: 148 # - not using Bzlmod, so the repository mapping is empty and 149 # apparent and canonical repository names are the same 150 # - target_repo is already a canonical repository name and does not 151 # have to be mapped. 152 # - path did not contain a slash and referred to a root symlink, 153 # which also should not be mapped. 154 return self._strategy.RlocationChecked(path) 155 156 # target_repo is an apparent repository name. Look up the corresponding 157 # canonical repository name with respect to the current repository, 158 # identified by its canonical name. 159 target_canonical = self._repo_mapping[(source_repo, target_repo)] 160 return self._strategy.RlocationChecked(target_canonical + "/" + remainder) 161 162 def EnvVars(self): 163 # type: () -> Dict[str, str] 164 """Returns environment variables for subprocesses. 165 166 The caller should set the returned key-value pairs in the environment of 167 subprocesses in case those subprocesses are also Bazel-built binaries that 168 need to use runfiles. 169 170 Returns: 171 {string: string}; a dict; keys are environment variable names, values are 172 the values for these environment variables 173 """ 174 return self._strategy.EnvVars() 175 176 def CurrentRepository(self, frame=1): 177 # type: (int) -> str 178 """Returns the canonical name of the caller's Bazel repository. 179 180 For example, this function returns '' (the empty string) when called 181 from the main repository and a string of the form 182 'rules_python~0.13.0` when called from code in the repository 183 corresponding to the rules_python Bazel module. 184 185 More information about the difference between canonical repository 186 names and the `@repo` part of labels is available at: 187 https://bazel.build/build/bzlmod#repository-names 188 189 NOTE: This function inspects the callstack to determine where in the 190 runfiles the caller is located to determine which repository it came 191 from. This may fail or produce incorrect results depending on who the 192 caller is, for example if it is not represented by a Python source 193 file. Use the `frame` argument to control the stack lookup. 194 195 Args: 196 frame: int; the stack frame to return the repository name for. 197 Defaults to 1, the caller of the CurrentRepository function. 198 199 Returns: 200 The canonical name of the Bazel repository containing the file 201 containing the frame-th caller of this function 202 203 Raises: 204 ValueError: if the caller cannot be determined or the caller's file 205 path is not contained in the Python runfiles tree 206 """ 207 # pylint:disable=protected-access # for sys._getframe 208 # pylint:disable=raise-missing-from # we're still supporting Python 2 209 try: 210 caller_path = inspect.getfile(sys._getframe(frame)) 211 except (TypeError, ValueError): 212 raise ValueError("failed to determine caller's file path") 213 caller_runfiles_path = os.path.relpath(caller_path, self._python_runfiles_root) 214 if caller_runfiles_path.startswith(".." + os.path.sep): 215 raise ValueError( 216 "{} does not lie under the runfiles root {}".format( 217 caller_path, self._python_runfiles_root 218 ) 219 ) 220 221 caller_runfiles_directory = caller_runfiles_path[ 222 : caller_runfiles_path.find(os.path.sep) 223 ] 224 # With Bzlmod, the runfiles directory of the main repository is always 225 # named "_main". Without Bzlmod, the value returned by this function is 226 # never used, so we just assume Bzlmod is enabled. 227 if caller_runfiles_directory == "_main": 228 # The canonical name of the main repository (also known as the 229 # workspace) is the empty string. 230 return "" 231 # For all other repositories, the name of the runfiles directory is the 232 # canonical name. 233 return caller_runfiles_directory 234 235 236def _FindPythonRunfilesRoot(): 237 # type: () -> str 238 """Finds the root of the Python runfiles tree.""" 239 root = __file__ 240 # Walk up our own runfiles path to the root of the runfiles tree from which 241 # the current file is being run. This path coincides with what the Bazel 242 # Python stub sets up as sys.path[0]. Since that entry can be changed at 243 # runtime, we rederive it here. 244 for _ in range("rules_python/python/runfiles/runfiles.py".count("/") + 1): 245 root = os.path.dirname(root) 246 return root 247 248 249def _ParseRepoMapping(repo_mapping_path): 250 # type: (Optional[str]) -> Dict[Tuple[str, str], str] 251 """Parses the repository mapping manifest.""" 252 # If the repository mapping file can't be found, that is not an error: We 253 # might be running without Bzlmod enabled or there may not be any runfiles. 254 # In this case, just apply an empty repo mapping. 255 if not repo_mapping_path: 256 return {} 257 try: 258 with open(repo_mapping_path, "r") as f: 259 content = f.read() 260 except FileNotFoundError: 261 return {} 262 263 repo_mapping = {} 264 for line in content.split("\n"): 265 if not line: 266 # Empty line following the last line break 267 break 268 current_canonical, target_local, target_canonical = line.split(",") 269 repo_mapping[(current_canonical, target_local)] = target_canonical 270 271 return repo_mapping 272 273 274class _ManifestBased(object): 275 """`Runfiles` strategy that parses a runfiles-manifest to look up runfiles.""" 276 277 def __init__(self, path): 278 # type: (str) -> None 279 if not path: 280 raise ValueError() 281 if not isinstance(path, str): 282 raise TypeError() 283 self._path = path 284 self._runfiles = _ManifestBased._LoadRunfiles(path) 285 286 def RlocationChecked(self, path): 287 # type: (str) -> Optional[str] 288 """Returns the runtime path of a runfile.""" 289 exact_match = self._runfiles.get(path) 290 if exact_match: 291 return exact_match 292 # If path references a runfile that lies under a directory that 293 # itself is a runfile, then only the directory is listed in the 294 # manifest. Look up all prefixes of path in the manifest and append 295 # the relative path from the prefix to the looked up path. 296 prefix_end = len(path) 297 while True: 298 prefix_end = path.rfind("/", 0, prefix_end - 1) 299 if prefix_end == -1: 300 return None 301 prefix_match = self._runfiles.get(path[0:prefix_end]) 302 if prefix_match: 303 return prefix_match + "/" + path[prefix_end + 1 :] 304 305 @staticmethod 306 def _LoadRunfiles(path): 307 # type: (str) -> Dict[str, str] 308 """Loads the runfiles manifest.""" 309 result = {} 310 with open(path, "r") as f: 311 for line in f: 312 line = line.strip() 313 if line: 314 tokens = line.split(" ", 1) 315 if len(tokens) == 1: 316 result[line] = line 317 else: 318 result[tokens[0]] = tokens[1] 319 return result 320 321 def _GetRunfilesDir(self): 322 # type: () -> str 323 if self._path.endswith("/MANIFEST") or self._path.endswith("\\MANIFEST"): 324 return self._path[: -len("/MANIFEST")] 325 elif self._path.endswith(".runfiles_manifest"): 326 return self._path[: -len("_manifest")] 327 else: 328 return "" 329 330 def EnvVars(self): 331 # type: () -> Dict[str, str] 332 directory = self._GetRunfilesDir() 333 return { 334 "RUNFILES_MANIFEST_FILE": self._path, 335 "RUNFILES_DIR": directory, 336 # TODO(laszlocsomor): remove JAVA_RUNFILES once the Java launcher can 337 # pick up RUNFILES_DIR. 338 "JAVA_RUNFILES": directory, 339 } 340 341 342class _DirectoryBased(object): 343 """`Runfiles` strategy that appends runfiles paths to the runfiles root.""" 344 345 def __init__(self, path): 346 # type: (str) -> None 347 if not path: 348 raise ValueError() 349 if not isinstance(path, str): 350 raise TypeError() 351 self._runfiles_root = path 352 353 def RlocationChecked(self, path): 354 # type: (str) -> str 355 356 # Use posixpath instead of os.path, because Bazel only creates a runfiles 357 # tree on Unix platforms, so `Create()` will only create a directory-based 358 # runfiles strategy on those platforms. 359 return posixpath.join(self._runfiles_root, path) 360 361 def EnvVars(self): 362 # type: () -> Dict[str, str] 363 return { 364 "RUNFILES_DIR": self._runfiles_root, 365 # TODO(laszlocsomor): remove JAVA_RUNFILES once the Java launcher can 366 # pick up RUNFILES_DIR. 367 "JAVA_RUNFILES": self._runfiles_root, 368 } 369