• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Utilities to support packages."""
2
3from collections import namedtuple
4from functools import singledispatch as simplegeneric
5import importlib
6import importlib.util
7import importlib.machinery
8import os
9import os.path
10import sys
11from types import ModuleType
12import warnings
13
14__all__ = [
15    'get_importer', 'iter_importers', 'get_loader', 'find_loader',
16    'walk_packages', 'iter_modules', 'get_data',
17    'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
18    'ModuleInfo',
19]
20
21
22ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
23ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.'
24
25
26def _get_spec(finder, name):
27    """Return the finder-specific module spec."""
28    # Works with legacy finders.
29    try:
30        find_spec = finder.find_spec
31    except AttributeError:
32        loader = finder.find_module(name)
33        if loader is None:
34            return None
35        return importlib.util.spec_from_loader(name, loader)
36    else:
37        return find_spec(name)
38
39
40def read_code(stream):
41    # This helper is needed in order for the PEP 302 emulation to
42    # correctly handle compiled files
43    import marshal
44
45    magic = stream.read(4)
46    if magic != importlib.util.MAGIC_NUMBER:
47        return None
48
49    stream.read(12) # Skip rest of the header
50    return marshal.load(stream)
51
52
53def walk_packages(path=None, prefix='', onerror=None):
54    """Yields ModuleInfo for all modules recursively
55    on path, or, if path is None, all accessible modules.
56
57    'path' should be either None or a list of paths to look for
58    modules in.
59
60    'prefix' is a string to output on the front of every module name
61    on output.
62
63    Note that this function must import all *packages* (NOT all
64    modules!) on the given path, in order to access the __path__
65    attribute to find submodules.
66
67    'onerror' is a function which gets called with one argument (the
68    name of the package which was being imported) if any exception
69    occurs while trying to import a package.  If no onerror function is
70    supplied, ImportErrors are caught and ignored, while all other
71    exceptions are propagated, terminating the search.
72
73    Examples:
74
75    # list all modules python can access
76    walk_packages()
77
78    # list all submodules of ctypes
79    walk_packages(ctypes.__path__, ctypes.__name__+'.')
80    """
81
82    def seen(p, m={}):
83        if p in m:
84            return True
85        m[p] = True
86
87    for info in iter_modules(path, prefix):
88        yield info
89
90        if info.ispkg:
91            try:
92                __import__(info.name)
93            except ImportError:
94                if onerror is not None:
95                    onerror(info.name)
96            except Exception:
97                if onerror is not None:
98                    onerror(info.name)
99                else:
100                    raise
101            else:
102                path = getattr(sys.modules[info.name], '__path__', None) or []
103
104                # don't traverse path items we've seen before
105                path = [p for p in path if not seen(p)]
106
107                yield from walk_packages(path, info.name+'.', onerror)
108
109
110def iter_modules(path=None, prefix=''):
111    """Yields ModuleInfo for all submodules on path,
112    or, if path is None, all top-level modules on sys.path.
113
114    'path' should be either None or a list of paths to look for
115    modules in.
116
117    'prefix' is a string to output on the front of every module name
118    on output.
119    """
120    if path is None:
121        importers = iter_importers()
122    elif isinstance(path, str):
123        raise ValueError("path must be None or list of paths to look for "
124                        "modules in")
125    else:
126        importers = map(get_importer, path)
127
128    yielded = {}
129    for i in importers:
130        for name, ispkg in iter_importer_modules(i, prefix):
131            if name not in yielded:
132                yielded[name] = 1
133                yield ModuleInfo(i, name, ispkg)
134
135
136@simplegeneric
137def iter_importer_modules(importer, prefix=''):
138    if not hasattr(importer, 'iter_modules'):
139        return []
140    return importer.iter_modules(prefix)
141
142
143# Implement a file walker for the normal importlib path hook
144def _iter_file_finder_modules(importer, prefix=''):
145    if importer.path is None or not os.path.isdir(importer.path):
146        return
147
148    yielded = {}
149    import inspect
150    try:
151        filenames = os.listdir(importer.path)
152    except OSError:
153        # ignore unreadable directories like import does
154        filenames = []
155    filenames.sort()  # handle packages before same-named modules
156
157    for fn in filenames:
158        modname = inspect.getmodulename(fn)
159        if modname=='__init__' or modname in yielded:
160            continue
161
162        path = os.path.join(importer.path, fn)
163        ispkg = False
164
165        if not modname and os.path.isdir(path) and '.' not in fn:
166            modname = fn
167            try:
168                dircontents = os.listdir(path)
169            except OSError:
170                # ignore unreadable directories like import does
171                dircontents = []
172            for fn in dircontents:
173                subname = inspect.getmodulename(fn)
174                if subname=='__init__':
175                    ispkg = True
176                    break
177            else:
178                continue    # not a package
179
180        if modname and '.' not in modname:
181            yielded[modname] = 1
182            yield prefix + modname, ispkg
183
184iter_importer_modules.register(
185    importlib.machinery.FileFinder, _iter_file_finder_modules)
186
187
188def _import_imp():
189    global imp
190    with warnings.catch_warnings():
191        warnings.simplefilter('ignore', DeprecationWarning)
192        imp = importlib.import_module('imp')
193
194class ImpImporter:
195    """PEP 302 Finder that wraps Python's "classic" import algorithm
196
197    ImpImporter(dirname) produces a PEP 302 finder that searches that
198    directory.  ImpImporter(None) produces a PEP 302 finder that searches
199    the current sys.path, plus any modules that are frozen or built-in.
200
201    Note that ImpImporter does not currently support being used by placement
202    on sys.meta_path.
203    """
204
205    def __init__(self, path=None):
206        global imp
207        warnings.warn("This emulation is deprecated and slated for removal "
208                      "in Python 3.12; use 'importlib' instead",
209             DeprecationWarning)
210        _import_imp()
211        self.path = path
212
213    def find_module(self, fullname, path=None):
214        # Note: we ignore 'path' argument since it is only used via meta_path
215        subname = fullname.split(".")[-1]
216        if subname != fullname and self.path is None:
217            return None
218        if self.path is None:
219            path = None
220        else:
221            path = [os.path.realpath(self.path)]
222        try:
223            file, filename, etc = imp.find_module(subname, path)
224        except ImportError:
225            return None
226        return ImpLoader(fullname, file, filename, etc)
227
228    def iter_modules(self, prefix=''):
229        if self.path is None or not os.path.isdir(self.path):
230            return
231
232        yielded = {}
233        import inspect
234        try:
235            filenames = os.listdir(self.path)
236        except OSError:
237            # ignore unreadable directories like import does
238            filenames = []
239        filenames.sort()  # handle packages before same-named modules
240
241        for fn in filenames:
242            modname = inspect.getmodulename(fn)
243            if modname=='__init__' or modname in yielded:
244                continue
245
246            path = os.path.join(self.path, fn)
247            ispkg = False
248
249            if not modname and os.path.isdir(path) and '.' not in fn:
250                modname = fn
251                try:
252                    dircontents = os.listdir(path)
253                except OSError:
254                    # ignore unreadable directories like import does
255                    dircontents = []
256                for fn in dircontents:
257                    subname = inspect.getmodulename(fn)
258                    if subname=='__init__':
259                        ispkg = True
260                        break
261                else:
262                    continue    # not a package
263
264            if modname and '.' not in modname:
265                yielded[modname] = 1
266                yield prefix + modname, ispkg
267
268
269class ImpLoader:
270    """PEP 302 Loader that wraps Python's "classic" import algorithm
271    """
272    code = source = None
273
274    def __init__(self, fullname, file, filename, etc):
275        warnings.warn("This emulation is deprecated and slated for removal in "
276                      "Python 3.12; use 'importlib' instead",
277                      DeprecationWarning)
278        _import_imp()
279        self.file = file
280        self.filename = filename
281        self.fullname = fullname
282        self.etc = etc
283
284    def load_module(self, fullname):
285        self._reopen()
286        try:
287            mod = imp.load_module(fullname, self.file, self.filename, self.etc)
288        finally:
289            if self.file:
290                self.file.close()
291        # Note: we don't set __loader__ because we want the module to look
292        # normal; i.e. this is just a wrapper for standard import machinery
293        return mod
294
295    def get_data(self, pathname):
296        with open(pathname, "rb") as file:
297            return file.read()
298
299    def _reopen(self):
300        if self.file and self.file.closed:
301            mod_type = self.etc[2]
302            if mod_type==imp.PY_SOURCE:
303                self.file = open(self.filename, 'r')
304            elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
305                self.file = open(self.filename, 'rb')
306
307    def _fix_name(self, fullname):
308        if fullname is None:
309            fullname = self.fullname
310        elif fullname != self.fullname:
311            raise ImportError("Loader for module %s cannot handle "
312                              "module %s" % (self.fullname, fullname))
313        return fullname
314
315    def is_package(self, fullname):
316        fullname = self._fix_name(fullname)
317        return self.etc[2]==imp.PKG_DIRECTORY
318
319    def get_code(self, fullname=None):
320        fullname = self._fix_name(fullname)
321        if self.code is None:
322            mod_type = self.etc[2]
323            if mod_type==imp.PY_SOURCE:
324                source = self.get_source(fullname)
325                self.code = compile(source, self.filename, 'exec')
326            elif mod_type==imp.PY_COMPILED:
327                self._reopen()
328                try:
329                    self.code = read_code(self.file)
330                finally:
331                    self.file.close()
332            elif mod_type==imp.PKG_DIRECTORY:
333                self.code = self._get_delegate().get_code()
334        return self.code
335
336    def get_source(self, fullname=None):
337        fullname = self._fix_name(fullname)
338        if self.source is None:
339            mod_type = self.etc[2]
340            if mod_type==imp.PY_SOURCE:
341                self._reopen()
342                try:
343                    self.source = self.file.read()
344                finally:
345                    self.file.close()
346            elif mod_type==imp.PY_COMPILED:
347                if os.path.exists(self.filename[:-1]):
348                    with open(self.filename[:-1], 'r') as f:
349                        self.source = f.read()
350            elif mod_type==imp.PKG_DIRECTORY:
351                self.source = self._get_delegate().get_source()
352        return self.source
353
354    def _get_delegate(self):
355        finder = ImpImporter(self.filename)
356        spec = _get_spec(finder, '__init__')
357        return spec.loader
358
359    def get_filename(self, fullname=None):
360        fullname = self._fix_name(fullname)
361        mod_type = self.etc[2]
362        if mod_type==imp.PKG_DIRECTORY:
363            return self._get_delegate().get_filename()
364        elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
365            return self.filename
366        return None
367
368
369try:
370    import zipimport
371    from zipimport import zipimporter
372
373    def iter_zipimport_modules(importer, prefix=''):
374        dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
375        _prefix = importer.prefix
376        plen = len(_prefix)
377        yielded = {}
378        import inspect
379        for fn in dirlist:
380            if not fn.startswith(_prefix):
381                continue
382
383            fn = fn[plen:].split(os.sep)
384
385            if len(fn)==2 and fn[1].startswith('__init__.py'):
386                if fn[0] not in yielded:
387                    yielded[fn[0]] = 1
388                    yield prefix + fn[0], True
389
390            if len(fn)!=1:
391                continue
392
393            modname = inspect.getmodulename(fn[0])
394            if modname=='__init__':
395                continue
396
397            if modname and '.' not in modname and modname not in yielded:
398                yielded[modname] = 1
399                yield prefix + modname, False
400
401    iter_importer_modules.register(zipimporter, iter_zipimport_modules)
402
403except ImportError:
404    pass
405
406
407def get_importer(path_item):
408    """Retrieve a finder for the given path item
409
410    The returned finder is cached in sys.path_importer_cache
411    if it was newly created by a path hook.
412
413    The cache (or part of it) can be cleared manually if a
414    rescan of sys.path_hooks is necessary.
415    """
416    try:
417        importer = sys.path_importer_cache[path_item]
418    except KeyError:
419        for path_hook in sys.path_hooks:
420            try:
421                importer = path_hook(path_item)
422                sys.path_importer_cache.setdefault(path_item, importer)
423                break
424            except ImportError:
425                pass
426        else:
427            importer = None
428    return importer
429
430
431def iter_importers(fullname=""):
432    """Yield finders for the given module name
433
434    If fullname contains a '.', the finders will be for the package
435    containing fullname, otherwise they will be all registered top level
436    finders (i.e. those on both sys.meta_path and sys.path_hooks).
437
438    If the named module is in a package, that package is imported as a side
439    effect of invoking this function.
440
441    If no module name is specified, all top level finders are produced.
442    """
443    if fullname.startswith('.'):
444        msg = "Relative module name {!r} not supported".format(fullname)
445        raise ImportError(msg)
446    if '.' in fullname:
447        # Get the containing package's __path__
448        pkg_name = fullname.rpartition(".")[0]
449        pkg = importlib.import_module(pkg_name)
450        path = getattr(pkg, '__path__', None)
451        if path is None:
452            return
453    else:
454        yield from sys.meta_path
455        path = sys.path
456    for item in path:
457        yield get_importer(item)
458
459
460def get_loader(module_or_name):
461    """Get a "loader" object for module_or_name
462
463    Returns None if the module cannot be found or imported.
464    If the named module is not already imported, its containing package
465    (if any) is imported, in order to establish the package __path__.
466    """
467    if module_or_name in sys.modules:
468        module_or_name = sys.modules[module_or_name]
469        if module_or_name is None:
470            return None
471    if isinstance(module_or_name, ModuleType):
472        module = module_or_name
473        loader = getattr(module, '__loader__', None)
474        if loader is not None:
475            return loader
476        if getattr(module, '__spec__', None) is None:
477            return None
478        fullname = module.__name__
479    else:
480        fullname = module_or_name
481    return find_loader(fullname)
482
483
484def find_loader(fullname):
485    """Find a "loader" object for fullname
486
487    This is a backwards compatibility wrapper around
488    importlib.util.find_spec that converts most failures to ImportError
489    and only returns the loader rather than the full spec
490    """
491    if fullname.startswith('.'):
492        msg = "Relative module name {!r} not supported".format(fullname)
493        raise ImportError(msg)
494    try:
495        spec = importlib.util.find_spec(fullname)
496    except (ImportError, AttributeError, TypeError, ValueError) as ex:
497        # This hack fixes an impedance mismatch between pkgutil and
498        # importlib, where the latter raises other errors for cases where
499        # pkgutil previously raised ImportError
500        msg = "Error while finding loader for {!r} ({}: {})"
501        raise ImportError(msg.format(fullname, type(ex), ex)) from ex
502    return spec.loader if spec is not None else None
503
504
505def extend_path(path, name):
506    """Extend a package's path.
507
508    Intended use is to place the following code in a package's __init__.py:
509
510        from pkgutil import extend_path
511        __path__ = extend_path(__path__, __name__)
512
513    This will add to the package's __path__ all subdirectories of
514    directories on sys.path named after the package.  This is useful
515    if one wants to distribute different parts of a single logical
516    package as multiple directories.
517
518    It also looks for *.pkg files beginning where * matches the name
519    argument.  This feature is similar to *.pth files (see site.py),
520    except that it doesn't special-case lines starting with 'import'.
521    A *.pkg file is trusted at face value: apart from checking for
522    duplicates, all entries found in a *.pkg file are added to the
523    path, regardless of whether they are exist the filesystem.  (This
524    is a feature.)
525
526    If the input path is not a list (as is the case for frozen
527    packages) it is returned unchanged.  The input path is not
528    modified; an extended copy is returned.  Items are only appended
529    to the copy at the end.
530
531    It is assumed that sys.path is a sequence.  Items of sys.path that
532    are not (unicode or 8-bit) strings referring to existing
533    directories are ignored.  Unicode items of sys.path that cause
534    errors when used as filenames may cause this function to raise an
535    exception (in line with os.path.isdir() behavior).
536    """
537
538    if not isinstance(path, list):
539        # This could happen e.g. when this is called from inside a
540        # frozen package.  Return the path unchanged in that case.
541        return path
542
543    sname_pkg = name + ".pkg"
544
545    path = path[:] # Start with a copy of the existing path
546
547    parent_package, _, final_name = name.rpartition('.')
548    if parent_package:
549        try:
550            search_path = sys.modules[parent_package].__path__
551        except (KeyError, AttributeError):
552            # We can't do anything: find_loader() returns None when
553            # passed a dotted name.
554            return path
555    else:
556        search_path = sys.path
557
558    for dir in search_path:
559        if not isinstance(dir, str):
560            continue
561
562        finder = get_importer(dir)
563        if finder is not None:
564            portions = []
565            if hasattr(finder, 'find_spec'):
566                spec = finder.find_spec(final_name)
567                if spec is not None:
568                    portions = spec.submodule_search_locations or []
569            # Is this finder PEP 420 compliant?
570            elif hasattr(finder, 'find_loader'):
571                _, portions = finder.find_loader(final_name)
572
573            for portion in portions:
574                # XXX This may still add duplicate entries to path on
575                # case-insensitive filesystems
576                if portion not in path:
577                    path.append(portion)
578
579        # XXX Is this the right thing for subpackages like zope.app?
580        # It looks for a file named "zope.app.pkg"
581        pkgfile = os.path.join(dir, sname_pkg)
582        if os.path.isfile(pkgfile):
583            try:
584                f = open(pkgfile)
585            except OSError as msg:
586                sys.stderr.write("Can't open %s: %s\n" %
587                                 (pkgfile, msg))
588            else:
589                with f:
590                    for line in f:
591                        line = line.rstrip('\n')
592                        if not line or line.startswith('#'):
593                            continue
594                        path.append(line) # Don't check for existence!
595
596    return path
597
598
599def get_data(package, resource):
600    """Get a resource from a package.
601
602    This is a wrapper round the PEP 302 loader get_data API. The package
603    argument should be the name of a package, in standard module format
604    (foo.bar). The resource argument should be in the form of a relative
605    filename, using '/' as the path separator. The parent directory name '..'
606    is not allowed, and nor is a rooted name (starting with a '/').
607
608    The function returns a binary string, which is the contents of the
609    specified resource.
610
611    For packages located in the filesystem, which have already been imported,
612    this is the rough equivalent of
613
614        d = os.path.dirname(sys.modules[package].__file__)
615        data = open(os.path.join(d, resource), 'rb').read()
616
617    If the package cannot be located or loaded, or it uses a PEP 302 loader
618    which does not support get_data(), then None is returned.
619    """
620
621    spec = importlib.util.find_spec(package)
622    if spec is None:
623        return None
624    loader = spec.loader
625    if loader is None or not hasattr(loader, 'get_data'):
626        return None
627    # XXX needs test
628    mod = (sys.modules.get(package) or
629           importlib._bootstrap._load(spec))
630    if mod is None or not hasattr(mod, '__file__'):
631        return None
632
633    # Modify the resource name to be compatible with the loader.get_data
634    # signature - an os.path format "filename" starting with the dirname of
635    # the package's __file__
636    parts = resource.split('/')
637    parts.insert(0, os.path.dirname(mod.__file__))
638    resource_name = os.path.join(*parts)
639    return loader.get_data(resource_name)
640
641
642_NAME_PATTERN = None
643
644def resolve_name(name):
645    """
646    Resolve a name to an object.
647
648    It is expected that `name` will be a string in one of the following
649    formats, where W is shorthand for a valid Python identifier and dot stands
650    for a literal period in these pseudo-regexes:
651
652    W(.W)*
653    W(.W)*:(W(.W)*)?
654
655    The first form is intended for backward compatibility only. It assumes that
656    some part of the dotted name is a package, and the rest is an object
657    somewhere within that package, possibly nested inside other objects.
658    Because the place where the package stops and the object hierarchy starts
659    can't be inferred by inspection, repeated attempts to import must be done
660    with this form.
661
662    In the second form, the caller makes the division point clear through the
663    provision of a single colon: the dotted name to the left of the colon is a
664    package to be imported, and the dotted name to the right is the object
665    hierarchy within that package. Only one import is needed in this form. If
666    it ends with the colon, then a module object is returned.
667
668    The function will return an object (which might be a module), or raise one
669    of the following exceptions:
670
671    ValueError - if `name` isn't in a recognised format
672    ImportError - if an import failed when it shouldn't have
673    AttributeError - if a failure occurred when traversing the object hierarchy
674                     within the imported package to get to the desired object.
675    """
676    global _NAME_PATTERN
677    if _NAME_PATTERN is None:
678        # Lazy import to speedup Python startup time
679        import re
680        dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
681        _NAME_PATTERN = re.compile(f'^(?P<pkg>{dotted_words})'
682                                   f'(?P<cln>:(?P<obj>{dotted_words})?)?$',
683                                   re.UNICODE)
684
685    m = _NAME_PATTERN.match(name)
686    if not m:
687        raise ValueError(f'invalid format: {name!r}')
688    gd = m.groupdict()
689    if gd.get('cln'):
690        # there is a colon - a one-step import is all that's needed
691        mod = importlib.import_module(gd['pkg'])
692        parts = gd.get('obj')
693        parts = parts.split('.') if parts else []
694    else:
695        # no colon - have to iterate to find the package boundary
696        parts = name.split('.')
697        modname = parts.pop(0)
698        # first part *must* be a module/package.
699        mod = importlib.import_module(modname)
700        while parts:
701            p = parts[0]
702            s = f'{modname}.{p}'
703            try:
704                mod = importlib.import_module(s)
705                parts.pop(0)
706                modname = s
707            except ImportError:
708                break
709    # if we reach this point, mod is the module, already imported, and
710    # parts is the list of parts in the object hierarchy to be traversed, or
711    # an empty list if just the module is wanted.
712    result = mod
713    for p in parts:
714        result = getattr(result, p)
715    return result
716