1"""Automatic discovery of Python modules and packages (for inclusion in the 2distribution) and other config values. 3 4For the purposes of this module, the following nomenclature is used: 5 6- "src-layout": a directory representing a Python project that contains a "src" 7 folder. Everything under the "src" folder is meant to be included in the 8 distribution when packaging the project. Example:: 9 10 . 11 ├── tox.ini 12 ├── pyproject.toml 13 └── src/ 14 └── mypkg/ 15 ├── __init__.py 16 ├── mymodule.py 17 └── my_data_file.txt 18 19- "flat-layout": a Python project that does not use "src-layout" but instead 20 have a directory under the project root for each package:: 21 22 . 23 ├── tox.ini 24 ├── pyproject.toml 25 └── mypkg/ 26 ├── __init__.py 27 ├── mymodule.py 28 └── my_data_file.txt 29 30- "single-module": a project that contains a single Python script direct under 31 the project root (no directory used):: 32 33 . 34 ├── tox.ini 35 ├── pyproject.toml 36 └── mymodule.py 37 38""" 39 40import itertools 41import os 42from fnmatch import fnmatchcase 43from glob import glob 44from pathlib import Path 45from typing import TYPE_CHECKING 46from typing import Callable, Dict, Iterator, Iterable, List, Optional, Tuple, Union 47 48import _distutils_hack.override # noqa: F401 49 50from distutils import log 51from distutils.util import convert_path 52 53_Path = Union[str, os.PathLike] 54_Filter = Callable[[str], bool] 55StrIter = Iterator[str] 56 57chain_iter = itertools.chain.from_iterable 58 59if TYPE_CHECKING: 60 from setuptools import Distribution # noqa 61 62 63def _valid_name(path: _Path) -> bool: 64 # Ignore invalid names that cannot be imported directly 65 return os.path.basename(path).isidentifier() 66 67 68class _Finder: 69 """Base class that exposes functionality for module/package finders""" 70 71 ALWAYS_EXCLUDE: Tuple[str, ...] = () 72 DEFAULT_EXCLUDE: Tuple[str, ...] = () 73 74 @classmethod 75 def find( 76 cls, 77 where: _Path = '.', 78 exclude: Iterable[str] = (), 79 include: Iterable[str] = ('*',) 80 ) -> List[str]: 81 """Return a list of all Python items (packages or modules, depending on 82 the finder implementation) found within directory 'where'. 83 84 'where' is the root directory which will be searched. 85 It should be supplied as a "cross-platform" (i.e. URL-style) path; 86 it will be converted to the appropriate local path syntax. 87 88 'exclude' is a sequence of names to exclude; '*' can be used 89 as a wildcard in the names. 90 When finding packages, 'foo.*' will exclude all subpackages of 'foo' 91 (but not 'foo' itself). 92 93 'include' is a sequence of names to include. 94 If it's specified, only the named items will be included. 95 If it's not specified, all found items will be included. 96 'include' can contain shell style wildcard patterns just like 97 'exclude'. 98 """ 99 100 exclude = exclude or cls.DEFAULT_EXCLUDE 101 return list( 102 cls._find_iter( 103 convert_path(str(where)), 104 cls._build_filter(*cls.ALWAYS_EXCLUDE, *exclude), 105 cls._build_filter(*include), 106 ) 107 ) 108 109 @classmethod 110 def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: 111 raise NotImplementedError 112 113 @staticmethod 114 def _build_filter(*patterns: str) -> _Filter: 115 """ 116 Given a list of patterns, return a callable that will be true only if 117 the input matches at least one of the patterns. 118 """ 119 return lambda name: any(fnmatchcase(name, pat) for pat in patterns) 120 121 122class PackageFinder(_Finder): 123 """ 124 Generate a list of all Python packages found within a directory 125 """ 126 127 ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__") 128 129 @classmethod 130 def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: 131 """ 132 All the packages found in 'where' that pass the 'include' filter, but 133 not the 'exclude' filter. 134 """ 135 for root, dirs, files in os.walk(str(where), followlinks=True): 136 # Copy dirs to iterate over it, then empty dirs. 137 all_dirs = dirs[:] 138 dirs[:] = [] 139 140 for dir in all_dirs: 141 full_path = os.path.join(root, dir) 142 rel_path = os.path.relpath(full_path, where) 143 package = rel_path.replace(os.path.sep, '.') 144 145 # Skip directory trees that are not valid packages 146 if '.' in dir or not cls._looks_like_package(full_path, package): 147 continue 148 149 # Should this package be included? 150 if include(package) and not exclude(package): 151 yield package 152 153 # Keep searching subdirectories, as there may be more packages 154 # down there, even if the parent was excluded. 155 dirs.append(dir) 156 157 @staticmethod 158 def _looks_like_package(path: _Path, _package_name: str) -> bool: 159 """Does a directory look like a package?""" 160 return os.path.isfile(os.path.join(path, '__init__.py')) 161 162 163class PEP420PackageFinder(PackageFinder): 164 @staticmethod 165 def _looks_like_package(_path: _Path, _package_name: str) -> bool: 166 return True 167 168 169class ModuleFinder(_Finder): 170 """Find isolated Python modules. 171 This function will **not** recurse subdirectories. 172 """ 173 174 @classmethod 175 def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: 176 for file in glob(os.path.join(where, "*.py")): 177 module, _ext = os.path.splitext(os.path.basename(file)) 178 179 if not cls._looks_like_module(module): 180 continue 181 182 if include(module) and not exclude(module): 183 yield module 184 185 _looks_like_module = staticmethod(_valid_name) 186 187 188# We have to be extra careful in the case of flat layout to not include files 189# and directories not meant for distribution (e.g. tool-related) 190 191 192class FlatLayoutPackageFinder(PEP420PackageFinder): 193 _EXCLUDE = ( 194 "ci", 195 "bin", 196 "doc", 197 "docs", 198 "documentation", 199 "manpages", 200 "news", 201 "changelog", 202 "test", 203 "tests", 204 "unit_test", 205 "unit_tests", 206 "example", 207 "examples", 208 "scripts", 209 "tools", 210 "util", 211 "utils", 212 "python", 213 "build", 214 "dist", 215 "venv", 216 "env", 217 "requirements", 218 # ---- Task runners / Build tools ---- 219 "tasks", # invoke 220 "fabfile", # fabric 221 "site_scons", # SCons 222 # ---- Other tools ---- 223 "benchmark", 224 "benchmarks", 225 "exercise", 226 "exercises", 227 # ---- Hidden directories/Private packages ---- 228 "[._]*", 229 ) 230 231 DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE)) 232 """Reserved package names""" 233 234 @staticmethod 235 def _looks_like_package(_path: _Path, package_name: str) -> bool: 236 names = package_name.split('.') 237 # Consider PEP 561 238 root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs") 239 return root_pkg_is_valid and all(name.isidentifier() for name in names[1:]) 240 241 242class FlatLayoutModuleFinder(ModuleFinder): 243 DEFAULT_EXCLUDE = ( 244 "setup", 245 "conftest", 246 "test", 247 "tests", 248 "example", 249 "examples", 250 "build", 251 # ---- Task runners ---- 252 "toxfile", 253 "noxfile", 254 "pavement", 255 "dodo", 256 "tasks", 257 "fabfile", 258 # ---- Other tools ---- 259 "[Ss][Cc]onstruct", # SCons 260 "conanfile", # Connan: C/C++ build tool 261 "manage", # Django 262 "benchmark", 263 "benchmarks", 264 "exercise", 265 "exercises", 266 # ---- Hidden files/Private modules ---- 267 "[._]*", 268 ) 269 """Reserved top-level module names""" 270 271 272def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]: 273 nested = PEP420PackageFinder.find(pkg_dir) 274 return [root_pkg] + [".".join((root_pkg, n)) for n in nested] 275 276 277class ConfigDiscovery: 278 """Fill-in metadata and options that can be automatically derived 279 (from other metadata/options, the file system or conventions) 280 """ 281 282 def __init__(self, distribution: "Distribution"): 283 self.dist = distribution 284 self._called = False 285 self._disabled = False 286 self._skip_ext_modules = False 287 288 def _disable(self): 289 """Internal API to disable automatic discovery""" 290 self._disabled = True 291 292 def _ignore_ext_modules(self): 293 """Internal API to disregard ext_modules. 294 295 Normally auto-discovery would not be triggered if ``ext_modules`` are set 296 (this is done for backward compatibility with existing packages relying on 297 ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function 298 to ignore given ``ext_modules`` and proceed with the auto-discovery if 299 ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml 300 metadata). 301 """ 302 self._skip_ext_modules = True 303 304 @property 305 def _root_dir(self) -> _Path: 306 # The best is to wait until `src_root` is set in dist, before using _root_dir. 307 return self.dist.src_root or os.curdir 308 309 @property 310 def _package_dir(self) -> Dict[str, str]: 311 if self.dist.package_dir is None: 312 return {} 313 return self.dist.package_dir 314 315 def __call__(self, force=False, name=True, ignore_ext_modules=False): 316 """Automatically discover missing configuration fields 317 and modifies the given ``distribution`` object in-place. 318 319 Note that by default this will only have an effect the first time the 320 ``ConfigDiscovery`` object is called. 321 322 To repeatedly invoke automatic discovery (e.g. when the project 323 directory changes), please use ``force=True`` (or create a new 324 ``ConfigDiscovery`` instance). 325 """ 326 if force is False and (self._called or self._disabled): 327 # Avoid overhead of multiple calls 328 return 329 330 self._analyse_package_layout(ignore_ext_modules) 331 if name: 332 self.analyse_name() # depends on ``packages`` and ``py_modules`` 333 334 self._called = True 335 336 def _explicitly_specified(self, ignore_ext_modules: bool) -> bool: 337 """``True`` if the user has specified some form of package/module listing""" 338 ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules 339 ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules) 340 return ( 341 self.dist.packages is not None 342 or self.dist.py_modules is not None 343 or ext_modules 344 or hasattr(self.dist, "configuration") and self.dist.configuration 345 # ^ Some projects use numpy.distutils.misc_util.Configuration 346 ) 347 348 def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool: 349 if self._explicitly_specified(ignore_ext_modules): 350 # For backward compatibility, just try to find modules/packages 351 # when nothing is given 352 return True 353 354 log.debug( 355 "No `packages` or `py_modules` configuration, performing " 356 "automatic discovery." 357 ) 358 359 return ( 360 self._analyse_explicit_layout() 361 or self._analyse_src_layout() 362 # flat-layout is the trickiest for discovery so it should be last 363 or self._analyse_flat_layout() 364 ) 365 366 def _analyse_explicit_layout(self) -> bool: 367 """The user can explicitly give a package layout via ``package_dir``""" 368 package_dir = self._package_dir.copy() # don't modify directly 369 package_dir.pop("", None) # This falls under the "src-layout" umbrella 370 root_dir = self._root_dir 371 372 if not package_dir: 373 return False 374 375 log.debug(f"`explicit-layout` detected -- analysing {package_dir}") 376 pkgs = chain_iter( 377 _find_packages_within(pkg, os.path.join(root_dir, parent_dir)) 378 for pkg, parent_dir in package_dir.items() 379 ) 380 self.dist.packages = list(pkgs) 381 log.debug(f"discovered packages -- {self.dist.packages}") 382 return True 383 384 def _analyse_src_layout(self) -> bool: 385 """Try to find all packages or modules under the ``src`` directory 386 (or anything pointed by ``package_dir[""]``). 387 388 The "src-layout" is relatively safe for automatic discovery. 389 We assume that everything within is meant to be included in the 390 distribution. 391 392 If ``package_dir[""]`` is not given, but the ``src`` directory exists, 393 this function will set ``package_dir[""] = "src"``. 394 """ 395 package_dir = self._package_dir 396 src_dir = os.path.join(self._root_dir, package_dir.get("", "src")) 397 if not os.path.isdir(src_dir): 398 return False 399 400 log.debug(f"`src-layout` detected -- analysing {src_dir}") 401 package_dir.setdefault("", os.path.basename(src_dir)) 402 self.dist.package_dir = package_dir # persist eventual modifications 403 self.dist.packages = PEP420PackageFinder.find(src_dir) 404 self.dist.py_modules = ModuleFinder.find(src_dir) 405 log.debug(f"discovered packages -- {self.dist.packages}") 406 log.debug(f"discovered py_modules -- {self.dist.py_modules}") 407 return True 408 409 def _analyse_flat_layout(self) -> bool: 410 """Try to find all packages and modules under the project root. 411 412 Since the ``flat-layout`` is more dangerous in terms of accidentally including 413 extra files/directories, this function is more conservative and will raise an 414 error if multiple packages or modules are found. 415 416 This assumes that multi-package dists are uncommon and refuse to support that 417 use case in order to be able to prevent unintended errors. 418 """ 419 log.debug(f"`flat-layout` detected -- analysing {self._root_dir}") 420 return self._analyse_flat_packages() or self._analyse_flat_modules() 421 422 def _analyse_flat_packages(self) -> bool: 423 self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir) 424 top_level = remove_nested_packages(remove_stubs(self.dist.packages)) 425 log.debug(f"discovered packages -- {self.dist.packages}") 426 self._ensure_no_accidental_inclusion(top_level, "packages") 427 return bool(top_level) 428 429 def _analyse_flat_modules(self) -> bool: 430 self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir) 431 log.debug(f"discovered py_modules -- {self.dist.py_modules}") 432 self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules") 433 return bool(self.dist.py_modules) 434 435 def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str): 436 if len(detected) > 1: 437 from inspect import cleandoc 438 from setuptools.errors import PackageDiscoveryError 439 440 msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}. 441 442 To avoid accidental inclusion of unwanted files or directories, 443 setuptools will not proceed with this build. 444 445 If you are trying to create a single distribution with multiple {kind} 446 on purpose, you should not rely on automatic discovery. 447 Instead, consider the following options: 448 449 1. set up custom discovery (`find` directive with `include` or `exclude`) 450 2. use a `src-layout` 451 3. explicitly set `py_modules` or `packages` with a list of names 452 453 To find more information, look for "package discovery" on setuptools docs. 454 """ 455 raise PackageDiscoveryError(cleandoc(msg)) 456 457 def analyse_name(self): 458 """The packages/modules are the essential contribution of the author. 459 Therefore the name of the distribution can be derived from them. 460 """ 461 if self.dist.metadata.name or self.dist.name: 462 # get_name() is not reliable (can return "UNKNOWN") 463 return None 464 465 log.debug("No `name` configuration, performing automatic discovery") 466 467 name = ( 468 self._find_name_single_package_or_module() 469 or self._find_name_from_packages() 470 ) 471 if name: 472 self.dist.metadata.name = name 473 self.dist.name = name 474 475 def _find_name_single_package_or_module(self) -> Optional[str]: 476 """Exactly one module or package""" 477 for field in ('packages', 'py_modules'): 478 items = getattr(self.dist, field, None) or [] 479 if items and len(items) == 1: 480 log.debug(f"Single module/package detected, name: {items[0]}") 481 return items[0] 482 483 return None 484 485 def _find_name_from_packages(self) -> Optional[str]: 486 """Try to find the root package that is not a PEP 420 namespace""" 487 if not self.dist.packages: 488 return None 489 490 packages = remove_stubs(sorted(self.dist.packages, key=len)) 491 package_dir = self.dist.package_dir or {} 492 493 parent_pkg = find_parent_package(packages, package_dir, self._root_dir) 494 if parent_pkg: 495 log.debug(f"Common parent package detected, name: {parent_pkg}") 496 return parent_pkg 497 498 log.warn("No parent package detected, impossible to derive `name`") 499 return None 500 501 502def remove_nested_packages(packages: List[str]) -> List[str]: 503 """Remove nested packages from a list of packages. 504 505 >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"]) 506 ['a'] 507 >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"]) 508 ['a', 'b', 'c.d', 'g.h'] 509 """ 510 pkgs = sorted(packages, key=len) 511 top_level = pkgs[:] 512 size = len(pkgs) 513 for i, name in enumerate(reversed(pkgs)): 514 if any(name.startswith(f"{other}.") for other in top_level): 515 top_level.pop(size - i - 1) 516 517 return top_level 518 519 520def remove_stubs(packages: List[str]) -> List[str]: 521 """Remove type stubs (:pep:`561`) from a list of packages. 522 523 >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"]) 524 ['a', 'a.b', 'b'] 525 """ 526 return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")] 527 528 529def find_parent_package( 530 packages: List[str], package_dir: Dict[str, str], root_dir: _Path 531) -> Optional[str]: 532 """Find the parent package that is not a namespace.""" 533 packages = sorted(packages, key=len) 534 common_ancestors = [] 535 for i, name in enumerate(packages): 536 if not all(n.startswith(f"{name}.") for n in packages[i+1:]): 537 # Since packages are sorted by length, this condition is able 538 # to find a list of all common ancestors. 539 # When there is divergence (e.g. multiple root packages) 540 # the list will be empty 541 break 542 common_ancestors.append(name) 543 544 for name in common_ancestors: 545 pkg_path = find_package_path(name, package_dir, root_dir) 546 init = os.path.join(pkg_path, "__init__.py") 547 if os.path.isfile(init): 548 return name 549 550 return None 551 552 553def find_package_path(name: str, package_dir: Dict[str, str], root_dir: _Path) -> str: 554 """Given a package name, return the path where it should be found on 555 disk, considering the ``package_dir`` option. 556 557 >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".") 558 >>> path.replace(os.sep, "/") 559 './root/is/nested/my/pkg' 560 561 >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".") 562 >>> path.replace(os.sep, "/") 563 './root/is/nested/pkg' 564 565 >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".") 566 >>> path.replace(os.sep, "/") 567 './root/is/nested' 568 569 >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".") 570 >>> path.replace(os.sep, "/") 571 './other/pkg' 572 """ 573 parts = name.split(".") 574 for i in range(len(parts), 0, -1): 575 # Look backwards, the most specific package_dir first 576 partial_name = ".".join(parts[:i]) 577 if partial_name in package_dir: 578 parent = package_dir[partial_name] 579 return os.path.join(root_dir, parent, *parts[i:]) 580 581 parent = package_dir.get("") or "" 582 return os.path.join(root_dir, *parent.split("/"), *parts) 583 584 585def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]: 586 parent_pkgs = remove_nested_packages(packages) 587 prefix = Path(package_path).parts 588 return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs} 589