• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Automatic discovery of Python modules and packages (for inclusion in the
2distribution) and other config values.
3
4For the purposes of this module, the following nomenclature is used:
5
6- "src-layout": a directory representing a Python project that contains a "src"
7  folder. Everything under the "src" folder is meant to be included in the
8  distribution when packaging the project. Example::
9
10    .
11    ├── tox.ini
12    ├── pyproject.toml
13    └── src/
14        └── mypkg/
15            ├── __init__.py
16            ├── mymodule.py
17            └── my_data_file.txt
18
19- "flat-layout": a Python project that does not use "src-layout" but instead
20  have a directory under the project root for each package::
21
22    .
23    ├── tox.ini
24    ├── pyproject.toml
25    └── mypkg/
26        ├── __init__.py
27        ├── mymodule.py
28        └── my_data_file.txt
29
30- "single-module": a project that contains a single Python script direct under
31  the project root (no directory used)::
32
33    .
34    ├── tox.ini
35    ├── pyproject.toml
36    └── mymodule.py
37
38"""
39
40import itertools
41import os
42from fnmatch import fnmatchcase
43from glob import glob
44from pathlib import Path
45from typing import (
46    TYPE_CHECKING,
47    Callable,
48    Dict,
49    Iterable,
50    Iterator,
51    List,
52    Mapping,
53    Optional,
54    Tuple,
55    Union
56)
57
58import _distutils_hack.override  # noqa: F401
59
60from distutils import log
61from distutils.util import convert_path
62
63_Path = Union[str, os.PathLike]
64_Filter = Callable[[str], bool]
65StrIter = Iterator[str]
66
67chain_iter = itertools.chain.from_iterable
68
69if TYPE_CHECKING:
70    from setuptools import Distribution  # noqa
71
72
73def _valid_name(path: _Path) -> bool:
74    # Ignore invalid names that cannot be imported directly
75    return os.path.basename(path).isidentifier()
76
77
78class _Finder:
79    """Base class that exposes functionality for module/package finders"""
80
81    ALWAYS_EXCLUDE: Tuple[str, ...] = ()
82    DEFAULT_EXCLUDE: Tuple[str, ...] = ()
83
84    @classmethod
85    def find(
86        cls,
87        where: _Path = '.',
88        exclude: Iterable[str] = (),
89        include: Iterable[str] = ('*',)
90    ) -> List[str]:
91        """Return a list of all Python items (packages or modules, depending on
92        the finder implementation) found within directory 'where'.
93
94        'where' is the root directory which will be searched.
95        It should be supplied as a "cross-platform" (i.e. URL-style) path;
96        it will be converted to the appropriate local path syntax.
97
98        'exclude' is a sequence of names to exclude; '*' can be used
99        as a wildcard in the names.
100        When finding packages, 'foo.*' will exclude all subpackages of 'foo'
101        (but not 'foo' itself).
102
103        'include' is a sequence of names to include.
104        If it's specified, only the named items will be included.
105        If it's not specified, all found items will be included.
106        'include' can contain shell style wildcard patterns just like
107        'exclude'.
108        """
109
110        exclude = exclude or cls.DEFAULT_EXCLUDE
111        return list(
112            cls._find_iter(
113                convert_path(str(where)),
114                cls._build_filter(*cls.ALWAYS_EXCLUDE, *exclude),
115                cls._build_filter(*include),
116            )
117        )
118
119    @classmethod
120    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
121        raise NotImplementedError
122
123    @staticmethod
124    def _build_filter(*patterns: str) -> _Filter:
125        """
126        Given a list of patterns, return a callable that will be true only if
127        the input matches at least one of the patterns.
128        """
129        return lambda name: any(fnmatchcase(name, pat) for pat in patterns)
130
131
132class PackageFinder(_Finder):
133    """
134    Generate a list of all Python packages found within a directory
135    """
136
137    ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__")
138
139    @classmethod
140    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
141        """
142        All the packages found in 'where' that pass the 'include' filter, but
143        not the 'exclude' filter.
144        """
145        for root, dirs, files in os.walk(str(where), followlinks=True):
146            # Copy dirs to iterate over it, then empty dirs.
147            all_dirs = dirs[:]
148            dirs[:] = []
149
150            for dir in all_dirs:
151                full_path = os.path.join(root, dir)
152                rel_path = os.path.relpath(full_path, where)
153                package = rel_path.replace(os.path.sep, '.')
154
155                # Skip directory trees that are not valid packages
156                if '.' in dir or not cls._looks_like_package(full_path, package):
157                    continue
158
159                # Should this package be included?
160                if include(package) and not exclude(package):
161                    yield package
162
163                # Keep searching subdirectories, as there may be more packages
164                # down there, even if the parent was excluded.
165                dirs.append(dir)
166
167    @staticmethod
168    def _looks_like_package(path: _Path, _package_name: str) -> bool:
169        """Does a directory look like a package?"""
170        return os.path.isfile(os.path.join(path, '__init__.py'))
171
172
173class PEP420PackageFinder(PackageFinder):
174    @staticmethod
175    def _looks_like_package(_path: _Path, _package_name: str) -> bool:
176        return True
177
178
179class ModuleFinder(_Finder):
180    """Find isolated Python modules.
181    This function will **not** recurse subdirectories.
182    """
183
184    @classmethod
185    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
186        for file in glob(os.path.join(where, "*.py")):
187            module, _ext = os.path.splitext(os.path.basename(file))
188
189            if not cls._looks_like_module(module):
190                continue
191
192            if include(module) and not exclude(module):
193                yield module
194
195    _looks_like_module = staticmethod(_valid_name)
196
197
198# We have to be extra careful in the case of flat layout to not include files
199# and directories not meant for distribution (e.g. tool-related)
200
201
202class FlatLayoutPackageFinder(PEP420PackageFinder):
203    _EXCLUDE = (
204        "ci",
205        "bin",
206        "doc",
207        "docs",
208        "documentation",
209        "manpages",
210        "news",
211        "changelog",
212        "test",
213        "tests",
214        "unit_test",
215        "unit_tests",
216        "example",
217        "examples",
218        "scripts",
219        "tools",
220        "util",
221        "utils",
222        "python",
223        "build",
224        "dist",
225        "venv",
226        "env",
227        "requirements",
228        # ---- Task runners / Build tools ----
229        "tasks",  # invoke
230        "fabfile",  # fabric
231        "site_scons",  # SCons
232        # ---- Other tools ----
233        "benchmark",
234        "benchmarks",
235        "exercise",
236        "exercises",
237        # ---- Hidden directories/Private packages ----
238        "[._]*",
239    )
240
241    DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE))
242    """Reserved package names"""
243
244    @staticmethod
245    def _looks_like_package(_path: _Path, package_name: str) -> bool:
246        names = package_name.split('.')
247        # Consider PEP 561
248        root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs")
249        return root_pkg_is_valid and all(name.isidentifier() for name in names[1:])
250
251
252class FlatLayoutModuleFinder(ModuleFinder):
253    DEFAULT_EXCLUDE = (
254        "setup",
255        "conftest",
256        "test",
257        "tests",
258        "example",
259        "examples",
260        "build",
261        # ---- Task runners ----
262        "toxfile",
263        "noxfile",
264        "pavement",
265        "dodo",
266        "tasks",
267        "fabfile",
268        # ---- Other tools ----
269        "[Ss][Cc]onstruct",  # SCons
270        "conanfile",  # Connan: C/C++ build tool
271        "manage",  # Django
272        "benchmark",
273        "benchmarks",
274        "exercise",
275        "exercises",
276        # ---- Hidden files/Private modules ----
277        "[._]*",
278    )
279    """Reserved top-level module names"""
280
281
282def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]:
283    nested = PEP420PackageFinder.find(pkg_dir)
284    return [root_pkg] + [".".join((root_pkg, n)) for n in nested]
285
286
287class ConfigDiscovery:
288    """Fill-in metadata and options that can be automatically derived
289    (from other metadata/options, the file system or conventions)
290    """
291
292    def __init__(self, distribution: "Distribution"):
293        self.dist = distribution
294        self._called = False
295        self._disabled = False
296        self._skip_ext_modules = False
297
298    def _disable(self):
299        """Internal API to disable automatic discovery"""
300        self._disabled = True
301
302    def _ignore_ext_modules(self):
303        """Internal API to disregard ext_modules.
304
305        Normally auto-discovery would not be triggered if ``ext_modules`` are set
306        (this is done for backward compatibility with existing packages relying on
307        ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function
308        to ignore given ``ext_modules`` and proceed with the auto-discovery if
309        ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml
310        metadata).
311        """
312        self._skip_ext_modules = True
313
314    @property
315    def _root_dir(self) -> _Path:
316        # The best is to wait until `src_root` is set in dist, before using _root_dir.
317        return self.dist.src_root or os.curdir
318
319    @property
320    def _package_dir(self) -> Dict[str, str]:
321        if self.dist.package_dir is None:
322            return {}
323        return self.dist.package_dir
324
325    def __call__(self, force=False, name=True, ignore_ext_modules=False):
326        """Automatically discover missing configuration fields
327        and modifies the given ``distribution`` object in-place.
328
329        Note that by default this will only have an effect the first time the
330        ``ConfigDiscovery`` object is called.
331
332        To repeatedly invoke automatic discovery (e.g. when the project
333        directory changes), please use ``force=True`` (or create a new
334        ``ConfigDiscovery`` instance).
335        """
336        if force is False and (self._called or self._disabled):
337            # Avoid overhead of multiple calls
338            return
339
340        self._analyse_package_layout(ignore_ext_modules)
341        if name:
342            self.analyse_name()  # depends on ``packages`` and ``py_modules``
343
344        self._called = True
345
346    def _explicitly_specified(self, ignore_ext_modules: bool) -> bool:
347        """``True`` if the user has specified some form of package/module listing"""
348        ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules
349        ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules)
350        return (
351            self.dist.packages is not None
352            or self.dist.py_modules is not None
353            or ext_modules
354            or hasattr(self.dist, "configuration") and self.dist.configuration
355            # ^ Some projects use numpy.distutils.misc_util.Configuration
356        )
357
358    def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool:
359        if self._explicitly_specified(ignore_ext_modules):
360            # For backward compatibility, just try to find modules/packages
361            # when nothing is given
362            return True
363
364        log.debug(
365            "No `packages` or `py_modules` configuration, performing "
366            "automatic discovery."
367        )
368
369        return (
370            self._analyse_explicit_layout()
371            or self._analyse_src_layout()
372            # flat-layout is the trickiest for discovery so it should be last
373            or self._analyse_flat_layout()
374        )
375
376    def _analyse_explicit_layout(self) -> bool:
377        """The user can explicitly give a package layout via ``package_dir``"""
378        package_dir = self._package_dir.copy()  # don't modify directly
379        package_dir.pop("", None)  # This falls under the "src-layout" umbrella
380        root_dir = self._root_dir
381
382        if not package_dir:
383            return False
384
385        log.debug(f"`explicit-layout` detected -- analysing {package_dir}")
386        pkgs = chain_iter(
387            _find_packages_within(pkg, os.path.join(root_dir, parent_dir))
388            for pkg, parent_dir in package_dir.items()
389        )
390        self.dist.packages = list(pkgs)
391        log.debug(f"discovered packages -- {self.dist.packages}")
392        return True
393
394    def _analyse_src_layout(self) -> bool:
395        """Try to find all packages or modules under the ``src`` directory
396        (or anything pointed by ``package_dir[""]``).
397
398        The "src-layout" is relatively safe for automatic discovery.
399        We assume that everything within is meant to be included in the
400        distribution.
401
402        If ``package_dir[""]`` is not given, but the ``src`` directory exists,
403        this function will set ``package_dir[""] = "src"``.
404        """
405        package_dir = self._package_dir
406        src_dir = os.path.join(self._root_dir, package_dir.get("", "src"))
407        if not os.path.isdir(src_dir):
408            return False
409
410        log.debug(f"`src-layout` detected -- analysing {src_dir}")
411        package_dir.setdefault("", os.path.basename(src_dir))
412        self.dist.package_dir = package_dir  # persist eventual modifications
413        self.dist.packages = PEP420PackageFinder.find(src_dir)
414        self.dist.py_modules = ModuleFinder.find(src_dir)
415        log.debug(f"discovered packages -- {self.dist.packages}")
416        log.debug(f"discovered py_modules -- {self.dist.py_modules}")
417        return True
418
419    def _analyse_flat_layout(self) -> bool:
420        """Try to find all packages and modules under the project root.
421
422        Since the ``flat-layout`` is more dangerous in terms of accidentally including
423        extra files/directories, this function is more conservative and will raise an
424        error if multiple packages or modules are found.
425
426        This assumes that multi-package dists are uncommon and refuse to support that
427        use case in order to be able to prevent unintended errors.
428        """
429        log.debug(f"`flat-layout` detected -- analysing {self._root_dir}")
430        return self._analyse_flat_packages() or self._analyse_flat_modules()
431
432    def _analyse_flat_packages(self) -> bool:
433        self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir)
434        top_level = remove_nested_packages(remove_stubs(self.dist.packages))
435        log.debug(f"discovered packages -- {self.dist.packages}")
436        self._ensure_no_accidental_inclusion(top_level, "packages")
437        return bool(top_level)
438
439    def _analyse_flat_modules(self) -> bool:
440        self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir)
441        log.debug(f"discovered py_modules -- {self.dist.py_modules}")
442        self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules")
443        return bool(self.dist.py_modules)
444
445    def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str):
446        if len(detected) > 1:
447            from inspect import cleandoc
448
449            from setuptools.errors import PackageDiscoveryError
450
451            msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}.
452
453            To avoid accidental inclusion of unwanted files or directories,
454            setuptools will not proceed with this build.
455
456            If you are trying to create a single distribution with multiple {kind}
457            on purpose, you should not rely on automatic discovery.
458            Instead, consider the following options:
459
460            1. set up custom discovery (`find` directive with `include` or `exclude`)
461            2. use a `src-layout`
462            3. explicitly set `py_modules` or `packages` with a list of names
463
464            To find more information, look for "package discovery" on setuptools docs.
465            """
466            raise PackageDiscoveryError(cleandoc(msg))
467
468    def analyse_name(self):
469        """The packages/modules are the essential contribution of the author.
470        Therefore the name of the distribution can be derived from them.
471        """
472        if self.dist.metadata.name or self.dist.name:
473            # get_name() is not reliable (can return "UNKNOWN")
474            return None
475
476        log.debug("No `name` configuration, performing automatic discovery")
477
478        name = (
479            self._find_name_single_package_or_module()
480            or self._find_name_from_packages()
481        )
482        if name:
483            self.dist.metadata.name = name
484
485    def _find_name_single_package_or_module(self) -> Optional[str]:
486        """Exactly one module or package"""
487        for field in ('packages', 'py_modules'):
488            items = getattr(self.dist, field, None) or []
489            if items and len(items) == 1:
490                log.debug(f"Single module/package detected, name: {items[0]}")
491                return items[0]
492
493        return None
494
495    def _find_name_from_packages(self) -> Optional[str]:
496        """Try to find the root package that is not a PEP 420 namespace"""
497        if not self.dist.packages:
498            return None
499
500        packages = remove_stubs(sorted(self.dist.packages, key=len))
501        package_dir = self.dist.package_dir or {}
502
503        parent_pkg = find_parent_package(packages, package_dir, self._root_dir)
504        if parent_pkg:
505            log.debug(f"Common parent package detected, name: {parent_pkg}")
506            return parent_pkg
507
508        log.warn("No parent package detected, impossible to derive `name`")
509        return None
510
511
512def remove_nested_packages(packages: List[str]) -> List[str]:
513    """Remove nested packages from a list of packages.
514
515    >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"])
516    ['a']
517    >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"])
518    ['a', 'b', 'c.d', 'g.h']
519    """
520    pkgs = sorted(packages, key=len)
521    top_level = pkgs[:]
522    size = len(pkgs)
523    for i, name in enumerate(reversed(pkgs)):
524        if any(name.startswith(f"{other}.") for other in top_level):
525            top_level.pop(size - i - 1)
526
527    return top_level
528
529
530def remove_stubs(packages: List[str]) -> List[str]:
531    """Remove type stubs (:pep:`561`) from a list of packages.
532
533    >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"])
534    ['a', 'a.b', 'b']
535    """
536    return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")]
537
538
539def find_parent_package(
540    packages: List[str], package_dir: Mapping[str, str], root_dir: _Path
541) -> Optional[str]:
542    """Find the parent package that is not a namespace."""
543    packages = sorted(packages, key=len)
544    common_ancestors = []
545    for i, name in enumerate(packages):
546        if not all(n.startswith(f"{name}.") for n in packages[i+1:]):
547            # Since packages are sorted by length, this condition is able
548            # to find a list of all common ancestors.
549            # When there is divergence (e.g. multiple root packages)
550            # the list will be empty
551            break
552        common_ancestors.append(name)
553
554    for name in common_ancestors:
555        pkg_path = find_package_path(name, package_dir, root_dir)
556        init = os.path.join(pkg_path, "__init__.py")
557        if os.path.isfile(init):
558            return name
559
560    return None
561
562
563def find_package_path(
564    name: str, package_dir: Mapping[str, str], root_dir: _Path
565) -> str:
566    """Given a package name, return the path where it should be found on
567    disk, considering the ``package_dir`` option.
568
569    >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".")
570    >>> path.replace(os.sep, "/")
571    './root/is/nested/my/pkg'
572
573    >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".")
574    >>> path.replace(os.sep, "/")
575    './root/is/nested/pkg'
576
577    >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".")
578    >>> path.replace(os.sep, "/")
579    './root/is/nested'
580
581    >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".")
582    >>> path.replace(os.sep, "/")
583    './other/pkg'
584    """
585    parts = name.split(".")
586    for i in range(len(parts), 0, -1):
587        # Look backwards, the most specific package_dir first
588        partial_name = ".".join(parts[:i])
589        if partial_name in package_dir:
590            parent = package_dir[partial_name]
591            return os.path.join(root_dir, parent, *parts[i:])
592
593    parent = package_dir.get("") or ""
594    return os.path.join(root_dir, *parent.split("/"), *parts)
595
596
597def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]:
598    parent_pkgs = remove_nested_packages(packages)
599    prefix = Path(package_path).parts
600    return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs}
601