1# Copyright 2023 The Bazel Authors. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Utility functions to discover python package types""" 16import os 17import textwrap 18from pathlib import Path # supported in >= 3.4 19from typing import List, Optional, Set 20 21 22def implicit_namespace_packages( 23 directory: str, ignored_dirnames: Optional[List[str]] = None 24) -> Set[Path]: 25 """Discovers namespace packages implemented using the 'native namespace packages' method. 26 27 AKA 'implicit namespace packages', which has been supported since Python 3.3. 28 See: https://packaging.python.org/guides/packaging-namespace-packages/#native-namespace-packages 29 30 Args: 31 directory: The root directory to recursively find packages in. 32 ignored_dirnames: A list of directories to exclude from the search 33 34 Returns: 35 The set of directories found under root to be packages using the native namespace method. 36 """ 37 namespace_pkg_dirs: Set[Path] = set() 38 standard_pkg_dirs: Set[Path] = set() 39 directory_path = Path(directory) 40 ignored_dirname_paths: List[Path] = [Path(p) for p in ignored_dirnames or ()] 41 # Traverse bottom-up because a directory can be a namespace pkg because its child contains module files. 42 for dirpath, dirnames, filenames in map( 43 lambda t: (Path(t[0]), *t[1:]), os.walk(directory_path, topdown=False) 44 ): 45 if "__init__.py" in filenames: 46 standard_pkg_dirs.add(dirpath) 47 continue 48 elif ignored_dirname_paths: 49 is_ignored_dir = dirpath in ignored_dirname_paths 50 child_of_ignored_dir = any( 51 d in dirpath.parents for d in ignored_dirname_paths 52 ) 53 if is_ignored_dir or child_of_ignored_dir: 54 continue 55 56 dir_includes_py_modules = _includes_python_modules(filenames) 57 parent_of_namespace_pkg = any( 58 Path(dirpath, d) in namespace_pkg_dirs for d in dirnames 59 ) 60 parent_of_standard_pkg = any( 61 Path(dirpath, d) in standard_pkg_dirs for d in dirnames 62 ) 63 parent_of_pkg = parent_of_namespace_pkg or parent_of_standard_pkg 64 if ( 65 (dir_includes_py_modules or parent_of_pkg) 66 and 67 # The root of the directory should never be an implicit namespace 68 dirpath != directory_path 69 ): 70 namespace_pkg_dirs.add(dirpath) 71 return namespace_pkg_dirs 72 73 74def add_pkgutil_style_namespace_pkg_init(dir_path: Path) -> None: 75 """Adds 'pkgutil-style namespace packages' init file to the given directory 76 77 See: https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages 78 79 Args: 80 dir_path: The directory to create an __init__.py for. 81 82 Raises: 83 ValueError: If the directory already contains an __init__.py file 84 """ 85 ns_pkg_init_filepath = os.path.join(dir_path, "__init__.py") 86 87 if os.path.isfile(ns_pkg_init_filepath): 88 raise ValueError("%s already contains an __init__.py file." % dir_path) 89 90 with open(ns_pkg_init_filepath, "w") as ns_pkg_init_f: 91 # See https://packaging.python.org/guides/packaging-namespace-packages/#pkgutil-style-namespace-packages 92 ns_pkg_init_f.write( 93 textwrap.dedent( 94 """\ 95 # __path__ manipulation added by bazelbuild/rules_python to support namespace pkgs. 96 __path__ = __import__('pkgutil').extend_path(__path__, __name__) 97 """ 98 ) 99 ) 100 101 102def _includes_python_modules(files: List[str]) -> bool: 103 """ 104 In order to only transform directories that Python actually considers namespace pkgs 105 we need to detect if a directory includes Python modules. 106 107 Which files are loadable as modules is extension based, and the particular set of extensions 108 varies by platform. 109 110 See: 111 1. https://github.com/python/cpython/blob/7d9d25dbedfffce61fc76bc7ccbfa9ae901bf56f/Lib/importlib/machinery.py#L19 112 2. PEP 420 -- Implicit Namespace Packages, Specification - https://www.python.org/dev/peps/pep-0420/#specification 113 3. dynload_shlib.c and dynload_win.c in python/cpython. 114 """ 115 module_suffixes = { 116 ".py", # Source modules 117 ".pyc", # Compiled bytecode modules 118 ".so", # Unix extension modules 119 ".pyd", # https://docs.python.org/3/faq/windows.html#is-a-pyd-file-the-same-as-a-dll 120 } 121 return any(Path(f).suffix in module_suffixes for f in files) 122