• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Automatic discovery of Python modules and packages (for inclusion in the
2distribution) and other config values.
3
4For the purposes of this module, the following nomenclature is used:
5
6- "src-layout": a directory representing a Python project that contains a "src"
7  folder. Everything under the "src" folder is meant to be included in the
8  distribution when packaging the project. Example::
9
10    .
11    ├── tox.ini
12    ├── pyproject.toml
13    └── src/
14        └── mypkg/
15            ├── __init__.py
16            ├── mymodule.py
17            └── my_data_file.txt
18
19- "flat-layout": a Python project that does not use "src-layout" but instead
20  have a directory under the project root for each package::
21
22    .
23    ├── tox.ini
24    ├── pyproject.toml
25    └── mypkg/
26        ├── __init__.py
27        ├── mymodule.py
28        └── my_data_file.txt
29
30- "single-module": a project that contains a single Python script direct under
31  the project root (no directory used)::
32
33    .
34    ├── tox.ini
35    ├── pyproject.toml
36    └── mymodule.py
37
38"""
39
40import itertools
41import os
42from fnmatch import fnmatchcase
43from glob import glob
44from pathlib import Path
45from typing import TYPE_CHECKING
46from typing import Callable, Dict, Iterator, Iterable, List, Optional, Tuple, Union
47
48import _distutils_hack.override  # noqa: F401
49
50from distutils import log
51from distutils.util import convert_path
52
53_Path = Union[str, os.PathLike]
54_Filter = Callable[[str], bool]
55StrIter = Iterator[str]
56
57chain_iter = itertools.chain.from_iterable
58
59if TYPE_CHECKING:
60    from setuptools import Distribution  # noqa
61
62
63def _valid_name(path: _Path) -> bool:
64    # Ignore invalid names that cannot be imported directly
65    return os.path.basename(path).isidentifier()
66
67
68class _Finder:
69    """Base class that exposes functionality for module/package finders"""
70
71    ALWAYS_EXCLUDE: Tuple[str, ...] = ()
72    DEFAULT_EXCLUDE: Tuple[str, ...] = ()
73
74    @classmethod
75    def find(
76        cls,
77        where: _Path = '.',
78        exclude: Iterable[str] = (),
79        include: Iterable[str] = ('*',)
80    ) -> List[str]:
81        """Return a list of all Python items (packages or modules, depending on
82        the finder implementation) found within directory 'where'.
83
84        'where' is the root directory which will be searched.
85        It should be supplied as a "cross-platform" (i.e. URL-style) path;
86        it will be converted to the appropriate local path syntax.
87
88        'exclude' is a sequence of names to exclude; '*' can be used
89        as a wildcard in the names.
90        When finding packages, 'foo.*' will exclude all subpackages of 'foo'
91        (but not 'foo' itself).
92
93        'include' is a sequence of names to include.
94        If it's specified, only the named items will be included.
95        If it's not specified, all found items will be included.
96        'include' can contain shell style wildcard patterns just like
97        'exclude'.
98        """
99
100        exclude = exclude or cls.DEFAULT_EXCLUDE
101        return list(
102            cls._find_iter(
103                convert_path(str(where)),
104                cls._build_filter(*cls.ALWAYS_EXCLUDE, *exclude),
105                cls._build_filter(*include),
106            )
107        )
108
109    @classmethod
110    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
111        raise NotImplementedError
112
113    @staticmethod
114    def _build_filter(*patterns: str) -> _Filter:
115        """
116        Given a list of patterns, return a callable that will be true only if
117        the input matches at least one of the patterns.
118        """
119        return lambda name: any(fnmatchcase(name, pat) for pat in patterns)
120
121
122class PackageFinder(_Finder):
123    """
124    Generate a list of all Python packages found within a directory
125    """
126
127    ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__")
128
129    @classmethod
130    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
131        """
132        All the packages found in 'where' that pass the 'include' filter, but
133        not the 'exclude' filter.
134        """
135        for root, dirs, files in os.walk(str(where), followlinks=True):
136            # Copy dirs to iterate over it, then empty dirs.
137            all_dirs = dirs[:]
138            dirs[:] = []
139
140            for dir in all_dirs:
141                full_path = os.path.join(root, dir)
142                rel_path = os.path.relpath(full_path, where)
143                package = rel_path.replace(os.path.sep, '.')
144
145                # Skip directory trees that are not valid packages
146                if '.' in dir or not cls._looks_like_package(full_path, package):
147                    continue
148
149                # Should this package be included?
150                if include(package) and not exclude(package):
151                    yield package
152
153                # Keep searching subdirectories, as there may be more packages
154                # down there, even if the parent was excluded.
155                dirs.append(dir)
156
157    @staticmethod
158    def _looks_like_package(path: _Path, _package_name: str) -> bool:
159        """Does a directory look like a package?"""
160        return os.path.isfile(os.path.join(path, '__init__.py'))
161
162
163class PEP420PackageFinder(PackageFinder):
164    @staticmethod
165    def _looks_like_package(_path: _Path, _package_name: str) -> bool:
166        return True
167
168
169class ModuleFinder(_Finder):
170    """Find isolated Python modules.
171    This function will **not** recurse subdirectories.
172    """
173
174    @classmethod
175    def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
176        for file in glob(os.path.join(where, "*.py")):
177            module, _ext = os.path.splitext(os.path.basename(file))
178
179            if not cls._looks_like_module(module):
180                continue
181
182            if include(module) and not exclude(module):
183                yield module
184
185    _looks_like_module = staticmethod(_valid_name)
186
187
188# We have to be extra careful in the case of flat layout to not include files
189# and directories not meant for distribution (e.g. tool-related)
190
191
192class FlatLayoutPackageFinder(PEP420PackageFinder):
193    _EXCLUDE = (
194        "ci",
195        "bin",
196        "doc",
197        "docs",
198        "documentation",
199        "manpages",
200        "news",
201        "changelog",
202        "test",
203        "tests",
204        "unit_test",
205        "unit_tests",
206        "example",
207        "examples",
208        "scripts",
209        "tools",
210        "util",
211        "utils",
212        "python",
213        "build",
214        "dist",
215        "venv",
216        "env",
217        "requirements",
218        # ---- Task runners / Build tools ----
219        "tasks",  # invoke
220        "fabfile",  # fabric
221        "site_scons",  # SCons
222        # ---- Other tools ----
223        "benchmark",
224        "benchmarks",
225        "exercise",
226        "exercises",
227        # ---- Hidden directories/Private packages ----
228        "[._]*",
229    )
230
231    DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE))
232    """Reserved package names"""
233
234    @staticmethod
235    def _looks_like_package(_path: _Path, package_name: str) -> bool:
236        names = package_name.split('.')
237        # Consider PEP 561
238        root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs")
239        return root_pkg_is_valid and all(name.isidentifier() for name in names[1:])
240
241
242class FlatLayoutModuleFinder(ModuleFinder):
243    DEFAULT_EXCLUDE = (
244        "setup",
245        "conftest",
246        "test",
247        "tests",
248        "example",
249        "examples",
250        "build",
251        # ---- Task runners ----
252        "toxfile",
253        "noxfile",
254        "pavement",
255        "dodo",
256        "tasks",
257        "fabfile",
258        # ---- Other tools ----
259        "[Ss][Cc]onstruct",  # SCons
260        "conanfile",  # Connan: C/C++ build tool
261        "manage",  # Django
262        "benchmark",
263        "benchmarks",
264        "exercise",
265        "exercises",
266        # ---- Hidden files/Private modules ----
267        "[._]*",
268    )
269    """Reserved top-level module names"""
270
271
272def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]:
273    nested = PEP420PackageFinder.find(pkg_dir)
274    return [root_pkg] + [".".join((root_pkg, n)) for n in nested]
275
276
277class ConfigDiscovery:
278    """Fill-in metadata and options that can be automatically derived
279    (from other metadata/options, the file system or conventions)
280    """
281
282    def __init__(self, distribution: "Distribution"):
283        self.dist = distribution
284        self._called = False
285        self._disabled = False
286        self._skip_ext_modules = False
287
288    def _disable(self):
289        """Internal API to disable automatic discovery"""
290        self._disabled = True
291
292    def _ignore_ext_modules(self):
293        """Internal API to disregard ext_modules.
294
295        Normally auto-discovery would not be triggered if ``ext_modules`` are set
296        (this is done for backward compatibility with existing packages relying on
297        ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function
298        to ignore given ``ext_modules`` and proceed with the auto-discovery if
299        ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml
300        metadata).
301        """
302        self._skip_ext_modules = True
303
304    @property
305    def _root_dir(self) -> _Path:
306        # The best is to wait until `src_root` is set in dist, before using _root_dir.
307        return self.dist.src_root or os.curdir
308
309    @property
310    def _package_dir(self) -> Dict[str, str]:
311        if self.dist.package_dir is None:
312            return {}
313        return self.dist.package_dir
314
315    def __call__(self, force=False, name=True, ignore_ext_modules=False):
316        """Automatically discover missing configuration fields
317        and modifies the given ``distribution`` object in-place.
318
319        Note that by default this will only have an effect the first time the
320        ``ConfigDiscovery`` object is called.
321
322        To repeatedly invoke automatic discovery (e.g. when the project
323        directory changes), please use ``force=True`` (or create a new
324        ``ConfigDiscovery`` instance).
325        """
326        if force is False and (self._called or self._disabled):
327            # Avoid overhead of multiple calls
328            return
329
330        self._analyse_package_layout(ignore_ext_modules)
331        if name:
332            self.analyse_name()  # depends on ``packages`` and ``py_modules``
333
334        self._called = True
335
336    def _explicitly_specified(self, ignore_ext_modules: bool) -> bool:
337        """``True`` if the user has specified some form of package/module listing"""
338        ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules
339        ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules)
340        return (
341            self.dist.packages is not None
342            or self.dist.py_modules is not None
343            or ext_modules
344            or hasattr(self.dist, "configuration") and self.dist.configuration
345            # ^ Some projects use numpy.distutils.misc_util.Configuration
346        )
347
348    def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool:
349        if self._explicitly_specified(ignore_ext_modules):
350            # For backward compatibility, just try to find modules/packages
351            # when nothing is given
352            return True
353
354        log.debug(
355            "No `packages` or `py_modules` configuration, performing "
356            "automatic discovery."
357        )
358
359        return (
360            self._analyse_explicit_layout()
361            or self._analyse_src_layout()
362            # flat-layout is the trickiest for discovery so it should be last
363            or self._analyse_flat_layout()
364        )
365
366    def _analyse_explicit_layout(self) -> bool:
367        """The user can explicitly give a package layout via ``package_dir``"""
368        package_dir = self._package_dir.copy()  # don't modify directly
369        package_dir.pop("", None)  # This falls under the "src-layout" umbrella
370        root_dir = self._root_dir
371
372        if not package_dir:
373            return False
374
375        log.debug(f"`explicit-layout` detected -- analysing {package_dir}")
376        pkgs = chain_iter(
377            _find_packages_within(pkg, os.path.join(root_dir, parent_dir))
378            for pkg, parent_dir in package_dir.items()
379        )
380        self.dist.packages = list(pkgs)
381        log.debug(f"discovered packages -- {self.dist.packages}")
382        return True
383
384    def _analyse_src_layout(self) -> bool:
385        """Try to find all packages or modules under the ``src`` directory
386        (or anything pointed by ``package_dir[""]``).
387
388        The "src-layout" is relatively safe for automatic discovery.
389        We assume that everything within is meant to be included in the
390        distribution.
391
392        If ``package_dir[""]`` is not given, but the ``src`` directory exists,
393        this function will set ``package_dir[""] = "src"``.
394        """
395        package_dir = self._package_dir
396        src_dir = os.path.join(self._root_dir, package_dir.get("", "src"))
397        if not os.path.isdir(src_dir):
398            return False
399
400        log.debug(f"`src-layout` detected -- analysing {src_dir}")
401        package_dir.setdefault("", os.path.basename(src_dir))
402        self.dist.package_dir = package_dir  # persist eventual modifications
403        self.dist.packages = PEP420PackageFinder.find(src_dir)
404        self.dist.py_modules = ModuleFinder.find(src_dir)
405        log.debug(f"discovered packages -- {self.dist.packages}")
406        log.debug(f"discovered py_modules -- {self.dist.py_modules}")
407        return True
408
409    def _analyse_flat_layout(self) -> bool:
410        """Try to find all packages and modules under the project root.
411
412        Since the ``flat-layout`` is more dangerous in terms of accidentally including
413        extra files/directories, this function is more conservative and will raise an
414        error if multiple packages or modules are found.
415
416        This assumes that multi-package dists are uncommon and refuse to support that
417        use case in order to be able to prevent unintended errors.
418        """
419        log.debug(f"`flat-layout` detected -- analysing {self._root_dir}")
420        return self._analyse_flat_packages() or self._analyse_flat_modules()
421
422    def _analyse_flat_packages(self) -> bool:
423        self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir)
424        top_level = remove_nested_packages(remove_stubs(self.dist.packages))
425        log.debug(f"discovered packages -- {self.dist.packages}")
426        self._ensure_no_accidental_inclusion(top_level, "packages")
427        return bool(top_level)
428
429    def _analyse_flat_modules(self) -> bool:
430        self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir)
431        log.debug(f"discovered py_modules -- {self.dist.py_modules}")
432        self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules")
433        return bool(self.dist.py_modules)
434
435    def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str):
436        if len(detected) > 1:
437            from inspect import cleandoc
438            from setuptools.errors import PackageDiscoveryError
439
440            msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}.
441
442            To avoid accidental inclusion of unwanted files or directories,
443            setuptools will not proceed with this build.
444
445            If you are trying to create a single distribution with multiple {kind}
446            on purpose, you should not rely on automatic discovery.
447            Instead, consider the following options:
448
449            1. set up custom discovery (`find` directive with `include` or `exclude`)
450            2. use a `src-layout`
451            3. explicitly set `py_modules` or `packages` with a list of names
452
453            To find more information, look for "package discovery" on setuptools docs.
454            """
455            raise PackageDiscoveryError(cleandoc(msg))
456
457    def analyse_name(self):
458        """The packages/modules are the essential contribution of the author.
459        Therefore the name of the distribution can be derived from them.
460        """
461        if self.dist.metadata.name or self.dist.name:
462            # get_name() is not reliable (can return "UNKNOWN")
463            return None
464
465        log.debug("No `name` configuration, performing automatic discovery")
466
467        name = (
468            self._find_name_single_package_or_module()
469            or self._find_name_from_packages()
470        )
471        if name:
472            self.dist.metadata.name = name
473            self.dist.name = name
474
475    def _find_name_single_package_or_module(self) -> Optional[str]:
476        """Exactly one module or package"""
477        for field in ('packages', 'py_modules'):
478            items = getattr(self.dist, field, None) or []
479            if items and len(items) == 1:
480                log.debug(f"Single module/package detected, name: {items[0]}")
481                return items[0]
482
483        return None
484
485    def _find_name_from_packages(self) -> Optional[str]:
486        """Try to find the root package that is not a PEP 420 namespace"""
487        if not self.dist.packages:
488            return None
489
490        packages = remove_stubs(sorted(self.dist.packages, key=len))
491        package_dir = self.dist.package_dir or {}
492
493        parent_pkg = find_parent_package(packages, package_dir, self._root_dir)
494        if parent_pkg:
495            log.debug(f"Common parent package detected, name: {parent_pkg}")
496            return parent_pkg
497
498        log.warn("No parent package detected, impossible to derive `name`")
499        return None
500
501
502def remove_nested_packages(packages: List[str]) -> List[str]:
503    """Remove nested packages from a list of packages.
504
505    >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"])
506    ['a']
507    >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"])
508    ['a', 'b', 'c.d', 'g.h']
509    """
510    pkgs = sorted(packages, key=len)
511    top_level = pkgs[:]
512    size = len(pkgs)
513    for i, name in enumerate(reversed(pkgs)):
514        if any(name.startswith(f"{other}.") for other in top_level):
515            top_level.pop(size - i - 1)
516
517    return top_level
518
519
520def remove_stubs(packages: List[str]) -> List[str]:
521    """Remove type stubs (:pep:`561`) from a list of packages.
522
523    >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"])
524    ['a', 'a.b', 'b']
525    """
526    return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")]
527
528
529def find_parent_package(
530    packages: List[str], package_dir: Dict[str, str], root_dir: _Path
531) -> Optional[str]:
532    """Find the parent package that is not a namespace."""
533    packages = sorted(packages, key=len)
534    common_ancestors = []
535    for i, name in enumerate(packages):
536        if not all(n.startswith(f"{name}.") for n in packages[i+1:]):
537            # Since packages are sorted by length, this condition is able
538            # to find a list of all common ancestors.
539            # When there is divergence (e.g. multiple root packages)
540            # the list will be empty
541            break
542        common_ancestors.append(name)
543
544    for name in common_ancestors:
545        pkg_path = find_package_path(name, package_dir, root_dir)
546        init = os.path.join(pkg_path, "__init__.py")
547        if os.path.isfile(init):
548            return name
549
550    return None
551
552
553def find_package_path(name: str, package_dir: Dict[str, str], root_dir: _Path) -> str:
554    """Given a package name, return the path where it should be found on
555    disk, considering the ``package_dir`` option.
556
557    >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".")
558    >>> path.replace(os.sep, "/")
559    './root/is/nested/my/pkg'
560
561    >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".")
562    >>> path.replace(os.sep, "/")
563    './root/is/nested/pkg'
564
565    >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".")
566    >>> path.replace(os.sep, "/")
567    './root/is/nested'
568
569    >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".")
570    >>> path.replace(os.sep, "/")
571    './other/pkg'
572    """
573    parts = name.split(".")
574    for i in range(len(parts), 0, -1):
575        # Look backwards, the most specific package_dir first
576        partial_name = ".".join(parts[:i])
577        if partial_name in package_dir:
578            parent = package_dir[partial_name]
579            return os.path.join(root_dir, parent, *parts[i:])
580
581    parent = package_dir.get("") or ""
582    return os.path.join(root_dir, *parent.split("/"), *parts)
583
584
585def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]:
586    parent_pkgs = remove_nested_packages(packages)
587    prefix = Path(package_path).parts
588    return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs}
589