1"""Automatic discovery of Python modules and packages (for inclusion in the 2distribution) and other config values. 3 4For the purposes of this module, the following nomenclature is used: 5 6- "src-layout": a directory representing a Python project that contains a "src" 7 folder. Everything under the "src" folder is meant to be included in the 8 distribution when packaging the project. Example:: 9 10 . 11 ├── tox.ini 12 ├── pyproject.toml 13 └── src/ 14 └── mypkg/ 15 ├── __init__.py 16 ├── mymodule.py 17 └── my_data_file.txt 18 19- "flat-layout": a Python project that does not use "src-layout" but instead 20 have a directory under the project root for each package:: 21 22 . 23 ├── tox.ini 24 ├── pyproject.toml 25 └── mypkg/ 26 ├── __init__.py 27 ├── mymodule.py 28 └── my_data_file.txt 29 30- "single-module": a project that contains a single Python script direct under 31 the project root (no directory used):: 32 33 . 34 ├── tox.ini 35 ├── pyproject.toml 36 └── mymodule.py 37 38""" 39 40import itertools 41import os 42from fnmatch import fnmatchcase 43from glob import glob 44from pathlib import Path 45from typing import ( 46 TYPE_CHECKING, 47 Callable, 48 Dict, 49 Iterable, 50 Iterator, 51 List, 52 Mapping, 53 Optional, 54 Tuple, 55 Union 56) 57 58import _distutils_hack.override # noqa: F401 59 60from distutils import log 61from distutils.util import convert_path 62 63_Path = Union[str, os.PathLike] 64_Filter = Callable[[str], bool] 65StrIter = Iterator[str] 66 67chain_iter = itertools.chain.from_iterable 68 69if TYPE_CHECKING: 70 from setuptools import Distribution # noqa 71 72 73def _valid_name(path: _Path) -> bool: 74 # Ignore invalid names that cannot be imported directly 75 return os.path.basename(path).isidentifier() 76 77 78class _Finder: 79 """Base class that exposes functionality for module/package finders""" 80 81 ALWAYS_EXCLUDE: Tuple[str, ...] = () 82 DEFAULT_EXCLUDE: Tuple[str, ...] = () 83 84 @classmethod 85 def find( 86 cls, 87 where: _Path = '.', 88 exclude: Iterable[str] = (), 89 include: Iterable[str] = ('*',) 90 ) -> List[str]: 91 """Return a list of all Python items (packages or modules, depending on 92 the finder implementation) found within directory 'where'. 93 94 'where' is the root directory which will be searched. 95 It should be supplied as a "cross-platform" (i.e. URL-style) path; 96 it will be converted to the appropriate local path syntax. 97 98 'exclude' is a sequence of names to exclude; '*' can be used 99 as a wildcard in the names. 100 When finding packages, 'foo.*' will exclude all subpackages of 'foo' 101 (but not 'foo' itself). 102 103 'include' is a sequence of names to include. 104 If it's specified, only the named items will be included. 105 If it's not specified, all found items will be included. 106 'include' can contain shell style wildcard patterns just like 107 'exclude'. 108 """ 109 110 exclude = exclude or cls.DEFAULT_EXCLUDE 111 return list( 112 cls._find_iter( 113 convert_path(str(where)), 114 cls._build_filter(*cls.ALWAYS_EXCLUDE, *exclude), 115 cls._build_filter(*include), 116 ) 117 ) 118 119 @classmethod 120 def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: 121 raise NotImplementedError 122 123 @staticmethod 124 def _build_filter(*patterns: str) -> _Filter: 125 """ 126 Given a list of patterns, return a callable that will be true only if 127 the input matches at least one of the patterns. 128 """ 129 return lambda name: any(fnmatchcase(name, pat) for pat in patterns) 130 131 132class PackageFinder(_Finder): 133 """ 134 Generate a list of all Python packages found within a directory 135 """ 136 137 ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__") 138 139 @classmethod 140 def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: 141 """ 142 All the packages found in 'where' that pass the 'include' filter, but 143 not the 'exclude' filter. 144 """ 145 for root, dirs, files in os.walk(str(where), followlinks=True): 146 # Copy dirs to iterate over it, then empty dirs. 147 all_dirs = dirs[:] 148 dirs[:] = [] 149 150 for dir in all_dirs: 151 full_path = os.path.join(root, dir) 152 rel_path = os.path.relpath(full_path, where) 153 package = rel_path.replace(os.path.sep, '.') 154 155 # Skip directory trees that are not valid packages 156 if '.' in dir or not cls._looks_like_package(full_path, package): 157 continue 158 159 # Should this package be included? 160 if include(package) and not exclude(package): 161 yield package 162 163 # Keep searching subdirectories, as there may be more packages 164 # down there, even if the parent was excluded. 165 dirs.append(dir) 166 167 @staticmethod 168 def _looks_like_package(path: _Path, _package_name: str) -> bool: 169 """Does a directory look like a package?""" 170 return os.path.isfile(os.path.join(path, '__init__.py')) 171 172 173class PEP420PackageFinder(PackageFinder): 174 @staticmethod 175 def _looks_like_package(_path: _Path, _package_name: str) -> bool: 176 return True 177 178 179class ModuleFinder(_Finder): 180 """Find isolated Python modules. 181 This function will **not** recurse subdirectories. 182 """ 183 184 @classmethod 185 def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter: 186 for file in glob(os.path.join(where, "*.py")): 187 module, _ext = os.path.splitext(os.path.basename(file)) 188 189 if not cls._looks_like_module(module): 190 continue 191 192 if include(module) and not exclude(module): 193 yield module 194 195 _looks_like_module = staticmethod(_valid_name) 196 197 198# We have to be extra careful in the case of flat layout to not include files 199# and directories not meant for distribution (e.g. tool-related) 200 201 202class FlatLayoutPackageFinder(PEP420PackageFinder): 203 _EXCLUDE = ( 204 "ci", 205 "bin", 206 "doc", 207 "docs", 208 "documentation", 209 "manpages", 210 "news", 211 "changelog", 212 "test", 213 "tests", 214 "unit_test", 215 "unit_tests", 216 "example", 217 "examples", 218 "scripts", 219 "tools", 220 "util", 221 "utils", 222 "python", 223 "build", 224 "dist", 225 "venv", 226 "env", 227 "requirements", 228 # ---- Task runners / Build tools ---- 229 "tasks", # invoke 230 "fabfile", # fabric 231 "site_scons", # SCons 232 # ---- Other tools ---- 233 "benchmark", 234 "benchmarks", 235 "exercise", 236 "exercises", 237 # ---- Hidden directories/Private packages ---- 238 "[._]*", 239 ) 240 241 DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE)) 242 """Reserved package names""" 243 244 @staticmethod 245 def _looks_like_package(_path: _Path, package_name: str) -> bool: 246 names = package_name.split('.') 247 # Consider PEP 561 248 root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs") 249 return root_pkg_is_valid and all(name.isidentifier() for name in names[1:]) 250 251 252class FlatLayoutModuleFinder(ModuleFinder): 253 DEFAULT_EXCLUDE = ( 254 "setup", 255 "conftest", 256 "test", 257 "tests", 258 "example", 259 "examples", 260 "build", 261 # ---- Task runners ---- 262 "toxfile", 263 "noxfile", 264 "pavement", 265 "dodo", 266 "tasks", 267 "fabfile", 268 # ---- Other tools ---- 269 "[Ss][Cc]onstruct", # SCons 270 "conanfile", # Connan: C/C++ build tool 271 "manage", # Django 272 "benchmark", 273 "benchmarks", 274 "exercise", 275 "exercises", 276 # ---- Hidden files/Private modules ---- 277 "[._]*", 278 ) 279 """Reserved top-level module names""" 280 281 282def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]: 283 nested = PEP420PackageFinder.find(pkg_dir) 284 return [root_pkg] + [".".join((root_pkg, n)) for n in nested] 285 286 287class ConfigDiscovery: 288 """Fill-in metadata and options that can be automatically derived 289 (from other metadata/options, the file system or conventions) 290 """ 291 292 def __init__(self, distribution: "Distribution"): 293 self.dist = distribution 294 self._called = False 295 self._disabled = False 296 self._skip_ext_modules = False 297 298 def _disable(self): 299 """Internal API to disable automatic discovery""" 300 self._disabled = True 301 302 def _ignore_ext_modules(self): 303 """Internal API to disregard ext_modules. 304 305 Normally auto-discovery would not be triggered if ``ext_modules`` are set 306 (this is done for backward compatibility with existing packages relying on 307 ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function 308 to ignore given ``ext_modules`` and proceed with the auto-discovery if 309 ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml 310 metadata). 311 """ 312 self._skip_ext_modules = True 313 314 @property 315 def _root_dir(self) -> _Path: 316 # The best is to wait until `src_root` is set in dist, before using _root_dir. 317 return self.dist.src_root or os.curdir 318 319 @property 320 def _package_dir(self) -> Dict[str, str]: 321 if self.dist.package_dir is None: 322 return {} 323 return self.dist.package_dir 324 325 def __call__(self, force=False, name=True, ignore_ext_modules=False): 326 """Automatically discover missing configuration fields 327 and modifies the given ``distribution`` object in-place. 328 329 Note that by default this will only have an effect the first time the 330 ``ConfigDiscovery`` object is called. 331 332 To repeatedly invoke automatic discovery (e.g. when the project 333 directory changes), please use ``force=True`` (or create a new 334 ``ConfigDiscovery`` instance). 335 """ 336 if force is False and (self._called or self._disabled): 337 # Avoid overhead of multiple calls 338 return 339 340 self._analyse_package_layout(ignore_ext_modules) 341 if name: 342 self.analyse_name() # depends on ``packages`` and ``py_modules`` 343 344 self._called = True 345 346 def _explicitly_specified(self, ignore_ext_modules: bool) -> bool: 347 """``True`` if the user has specified some form of package/module listing""" 348 ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules 349 ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules) 350 return ( 351 self.dist.packages is not None 352 or self.dist.py_modules is not None 353 or ext_modules 354 or hasattr(self.dist, "configuration") and self.dist.configuration 355 # ^ Some projects use numpy.distutils.misc_util.Configuration 356 ) 357 358 def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool: 359 if self._explicitly_specified(ignore_ext_modules): 360 # For backward compatibility, just try to find modules/packages 361 # when nothing is given 362 return True 363 364 log.debug( 365 "No `packages` or `py_modules` configuration, performing " 366 "automatic discovery." 367 ) 368 369 return ( 370 self._analyse_explicit_layout() 371 or self._analyse_src_layout() 372 # flat-layout is the trickiest for discovery so it should be last 373 or self._analyse_flat_layout() 374 ) 375 376 def _analyse_explicit_layout(self) -> bool: 377 """The user can explicitly give a package layout via ``package_dir``""" 378 package_dir = self._package_dir.copy() # don't modify directly 379 package_dir.pop("", None) # This falls under the "src-layout" umbrella 380 root_dir = self._root_dir 381 382 if not package_dir: 383 return False 384 385 log.debug(f"`explicit-layout` detected -- analysing {package_dir}") 386 pkgs = chain_iter( 387 _find_packages_within(pkg, os.path.join(root_dir, parent_dir)) 388 for pkg, parent_dir in package_dir.items() 389 ) 390 self.dist.packages = list(pkgs) 391 log.debug(f"discovered packages -- {self.dist.packages}") 392 return True 393 394 def _analyse_src_layout(self) -> bool: 395 """Try to find all packages or modules under the ``src`` directory 396 (or anything pointed by ``package_dir[""]``). 397 398 The "src-layout" is relatively safe for automatic discovery. 399 We assume that everything within is meant to be included in the 400 distribution. 401 402 If ``package_dir[""]`` is not given, but the ``src`` directory exists, 403 this function will set ``package_dir[""] = "src"``. 404 """ 405 package_dir = self._package_dir 406 src_dir = os.path.join(self._root_dir, package_dir.get("", "src")) 407 if not os.path.isdir(src_dir): 408 return False 409 410 log.debug(f"`src-layout` detected -- analysing {src_dir}") 411 package_dir.setdefault("", os.path.basename(src_dir)) 412 self.dist.package_dir = package_dir # persist eventual modifications 413 self.dist.packages = PEP420PackageFinder.find(src_dir) 414 self.dist.py_modules = ModuleFinder.find(src_dir) 415 log.debug(f"discovered packages -- {self.dist.packages}") 416 log.debug(f"discovered py_modules -- {self.dist.py_modules}") 417 return True 418 419 def _analyse_flat_layout(self) -> bool: 420 """Try to find all packages and modules under the project root. 421 422 Since the ``flat-layout`` is more dangerous in terms of accidentally including 423 extra files/directories, this function is more conservative and will raise an 424 error if multiple packages or modules are found. 425 426 This assumes that multi-package dists are uncommon and refuse to support that 427 use case in order to be able to prevent unintended errors. 428 """ 429 log.debug(f"`flat-layout` detected -- analysing {self._root_dir}") 430 return self._analyse_flat_packages() or self._analyse_flat_modules() 431 432 def _analyse_flat_packages(self) -> bool: 433 self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir) 434 top_level = remove_nested_packages(remove_stubs(self.dist.packages)) 435 log.debug(f"discovered packages -- {self.dist.packages}") 436 self._ensure_no_accidental_inclusion(top_level, "packages") 437 return bool(top_level) 438 439 def _analyse_flat_modules(self) -> bool: 440 self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir) 441 log.debug(f"discovered py_modules -- {self.dist.py_modules}") 442 self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules") 443 return bool(self.dist.py_modules) 444 445 def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str): 446 if len(detected) > 1: 447 from inspect import cleandoc 448 449 from setuptools.errors import PackageDiscoveryError 450 451 msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}. 452 453 To avoid accidental inclusion of unwanted files or directories, 454 setuptools will not proceed with this build. 455 456 If you are trying to create a single distribution with multiple {kind} 457 on purpose, you should not rely on automatic discovery. 458 Instead, consider the following options: 459 460 1. set up custom discovery (`find` directive with `include` or `exclude`) 461 2. use a `src-layout` 462 3. explicitly set `py_modules` or `packages` with a list of names 463 464 To find more information, look for "package discovery" on setuptools docs. 465 """ 466 raise PackageDiscoveryError(cleandoc(msg)) 467 468 def analyse_name(self): 469 """The packages/modules are the essential contribution of the author. 470 Therefore the name of the distribution can be derived from them. 471 """ 472 if self.dist.metadata.name or self.dist.name: 473 # get_name() is not reliable (can return "UNKNOWN") 474 return None 475 476 log.debug("No `name` configuration, performing automatic discovery") 477 478 name = ( 479 self._find_name_single_package_or_module() 480 or self._find_name_from_packages() 481 ) 482 if name: 483 self.dist.metadata.name = name 484 485 def _find_name_single_package_or_module(self) -> Optional[str]: 486 """Exactly one module or package""" 487 for field in ('packages', 'py_modules'): 488 items = getattr(self.dist, field, None) or [] 489 if items and len(items) == 1: 490 log.debug(f"Single module/package detected, name: {items[0]}") 491 return items[0] 492 493 return None 494 495 def _find_name_from_packages(self) -> Optional[str]: 496 """Try to find the root package that is not a PEP 420 namespace""" 497 if not self.dist.packages: 498 return None 499 500 packages = remove_stubs(sorted(self.dist.packages, key=len)) 501 package_dir = self.dist.package_dir or {} 502 503 parent_pkg = find_parent_package(packages, package_dir, self._root_dir) 504 if parent_pkg: 505 log.debug(f"Common parent package detected, name: {parent_pkg}") 506 return parent_pkg 507 508 log.warn("No parent package detected, impossible to derive `name`") 509 return None 510 511 512def remove_nested_packages(packages: List[str]) -> List[str]: 513 """Remove nested packages from a list of packages. 514 515 >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"]) 516 ['a'] 517 >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"]) 518 ['a', 'b', 'c.d', 'g.h'] 519 """ 520 pkgs = sorted(packages, key=len) 521 top_level = pkgs[:] 522 size = len(pkgs) 523 for i, name in enumerate(reversed(pkgs)): 524 if any(name.startswith(f"{other}.") for other in top_level): 525 top_level.pop(size - i - 1) 526 527 return top_level 528 529 530def remove_stubs(packages: List[str]) -> List[str]: 531 """Remove type stubs (:pep:`561`) from a list of packages. 532 533 >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"]) 534 ['a', 'a.b', 'b'] 535 """ 536 return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")] 537 538 539def find_parent_package( 540 packages: List[str], package_dir: Mapping[str, str], root_dir: _Path 541) -> Optional[str]: 542 """Find the parent package that is not a namespace.""" 543 packages = sorted(packages, key=len) 544 common_ancestors = [] 545 for i, name in enumerate(packages): 546 if not all(n.startswith(f"{name}.") for n in packages[i+1:]): 547 # Since packages are sorted by length, this condition is able 548 # to find a list of all common ancestors. 549 # When there is divergence (e.g. multiple root packages) 550 # the list will be empty 551 break 552 common_ancestors.append(name) 553 554 for name in common_ancestors: 555 pkg_path = find_package_path(name, package_dir, root_dir) 556 init = os.path.join(pkg_path, "__init__.py") 557 if os.path.isfile(init): 558 return name 559 560 return None 561 562 563def find_package_path( 564 name: str, package_dir: Mapping[str, str], root_dir: _Path 565) -> str: 566 """Given a package name, return the path where it should be found on 567 disk, considering the ``package_dir`` option. 568 569 >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".") 570 >>> path.replace(os.sep, "/") 571 './root/is/nested/my/pkg' 572 573 >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".") 574 >>> path.replace(os.sep, "/") 575 './root/is/nested/pkg' 576 577 >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".") 578 >>> path.replace(os.sep, "/") 579 './root/is/nested' 580 581 >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".") 582 >>> path.replace(os.sep, "/") 583 './other/pkg' 584 """ 585 parts = name.split(".") 586 for i in range(len(parts), 0, -1): 587 # Look backwards, the most specific package_dir first 588 partial_name = ".".join(parts[:i]) 589 if partial_name in package_dir: 590 parent = package_dir[partial_name] 591 return os.path.join(root_dir, parent, *parts[i:]) 592 593 parent = package_dir.get("") or "" 594 return os.path.join(root_dir, *parent.split("/"), *parts) 595 596 597def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]: 598 parent_pkgs = remove_nested_packages(packages) 599 prefix = Path(package_path).parts 600 return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs} 601