• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Utilities to support packages."""
2
3from collections import namedtuple
4from functools import singledispatch as simplegeneric
5import importlib
6import importlib.util
7import importlib.machinery
8import os
9import os.path
10import re
11import sys
12from types import ModuleType
13import warnings
14
15__all__ = [
16    'get_importer', 'iter_importers', 'get_loader', 'find_loader',
17    'walk_packages', 'iter_modules', 'get_data',
18    'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
19    'ModuleInfo',
20]
21
22
23ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
24ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.'
25
26
27def _get_spec(finder, name):
28    """Return the finder-specific module spec."""
29    # Works with legacy finders.
30    try:
31        find_spec = finder.find_spec
32    except AttributeError:
33        loader = finder.find_module(name)
34        if loader is None:
35            return None
36        return importlib.util.spec_from_loader(name, loader)
37    else:
38        return find_spec(name)
39
40
41def read_code(stream):
42    # This helper is needed in order for the PEP 302 emulation to
43    # correctly handle compiled files
44    import marshal
45
46    magic = stream.read(4)
47    if magic != importlib.util.MAGIC_NUMBER:
48        return None
49
50    stream.read(12) # Skip rest of the header
51    return marshal.load(stream)
52
53
54def walk_packages(path=None, prefix='', onerror=None):
55    """Yields ModuleInfo for all modules recursively
56    on path, or, if path is None, all accessible modules.
57
58    'path' should be either None or a list of paths to look for
59    modules in.
60
61    'prefix' is a string to output on the front of every module name
62    on output.
63
64    Note that this function must import all *packages* (NOT all
65    modules!) on the given path, in order to access the __path__
66    attribute to find submodules.
67
68    'onerror' is a function which gets called with one argument (the
69    name of the package which was being imported) if any exception
70    occurs while trying to import a package.  If no onerror function is
71    supplied, ImportErrors are caught and ignored, while all other
72    exceptions are propagated, terminating the search.
73
74    Examples:
75
76    # list all modules python can access
77    walk_packages()
78
79    # list all submodules of ctypes
80    walk_packages(ctypes.__path__, ctypes.__name__+'.')
81    """
82
83    def seen(p, m={}):
84        if p in m:
85            return True
86        m[p] = True
87
88    for info in iter_modules(path, prefix):
89        yield info
90
91        if info.ispkg:
92            try:
93                __import__(info.name)
94            except ImportError:
95                if onerror is not None:
96                    onerror(info.name)
97            except Exception:
98                if onerror is not None:
99                    onerror(info.name)
100                else:
101                    raise
102            else:
103                path = getattr(sys.modules[info.name], '__path__', None) or []
104
105                # don't traverse path items we've seen before
106                path = [p for p in path if not seen(p)]
107
108                yield from walk_packages(path, info.name+'.', onerror)
109
110
111def iter_modules(path=None, prefix=''):
112    """Yields ModuleInfo for all submodules on path,
113    or, if path is None, all top-level modules on sys.path.
114
115    'path' should be either None or a list of paths to look for
116    modules in.
117
118    'prefix' is a string to output on the front of every module name
119    on output.
120    """
121    if path is None:
122        importers = iter_importers()
123    elif isinstance(path, str):
124        raise ValueError("path must be None or list of paths to look for "
125                        "modules in")
126    else:
127        importers = map(get_importer, path)
128
129    yielded = {}
130    for i in importers:
131        for name, ispkg in iter_importer_modules(i, prefix):
132            if name not in yielded:
133                yielded[name] = 1
134                yield ModuleInfo(i, name, ispkg)
135
136
137@simplegeneric
138def iter_importer_modules(importer, prefix=''):
139    if not hasattr(importer, 'iter_modules'):
140        return []
141    return importer.iter_modules(prefix)
142
143
144# Implement a file walker for the normal importlib path hook
145def _iter_file_finder_modules(importer, prefix=''):
146    if importer.path is None or not os.path.isdir(importer.path):
147        return
148
149    yielded = {}
150    import inspect
151    try:
152        filenames = os.listdir(importer.path)
153    except OSError:
154        # ignore unreadable directories like import does
155        filenames = []
156    filenames.sort()  # handle packages before same-named modules
157
158    for fn in filenames:
159        modname = inspect.getmodulename(fn)
160        if modname=='__init__' or modname in yielded:
161            continue
162
163        path = os.path.join(importer.path, fn)
164        ispkg = False
165
166        if not modname and os.path.isdir(path) and '.' not in fn:
167            modname = fn
168            try:
169                dircontents = os.listdir(path)
170            except OSError:
171                # ignore unreadable directories like import does
172                dircontents = []
173            for fn in dircontents:
174                subname = inspect.getmodulename(fn)
175                if subname=='__init__':
176                    ispkg = True
177                    break
178            else:
179                continue    # not a package
180
181        if modname and '.' not in modname:
182            yielded[modname] = 1
183            yield prefix + modname, ispkg
184
185iter_importer_modules.register(
186    importlib.machinery.FileFinder, _iter_file_finder_modules)
187
188
189def _import_imp():
190    global imp
191    with warnings.catch_warnings():
192        warnings.simplefilter('ignore', DeprecationWarning)
193        imp = importlib.import_module('imp')
194
195class ImpImporter:
196    """PEP 302 Finder that wraps Python's "classic" import algorithm
197
198    ImpImporter(dirname) produces a PEP 302 finder that searches that
199    directory.  ImpImporter(None) produces a PEP 302 finder that searches
200    the current sys.path, plus any modules that are frozen or built-in.
201
202    Note that ImpImporter does not currently support being used by placement
203    on sys.meta_path.
204    """
205
206    def __init__(self, path=None):
207        global imp
208        warnings.warn("This emulation is deprecated, use 'importlib' instead",
209             DeprecationWarning)
210        _import_imp()
211        self.path = path
212
213    def find_module(self, fullname, path=None):
214        # Note: we ignore 'path' argument since it is only used via meta_path
215        subname = fullname.split(".")[-1]
216        if subname != fullname and self.path is None:
217            return None
218        if self.path is None:
219            path = None
220        else:
221            path = [os.path.realpath(self.path)]
222        try:
223            file, filename, etc = imp.find_module(subname, path)
224        except ImportError:
225            return None
226        return ImpLoader(fullname, file, filename, etc)
227
228    def iter_modules(self, prefix=''):
229        if self.path is None or not os.path.isdir(self.path):
230            return
231
232        yielded = {}
233        import inspect
234        try:
235            filenames = os.listdir(self.path)
236        except OSError:
237            # ignore unreadable directories like import does
238            filenames = []
239        filenames.sort()  # handle packages before same-named modules
240
241        for fn in filenames:
242            modname = inspect.getmodulename(fn)
243            if modname=='__init__' or modname in yielded:
244                continue
245
246            path = os.path.join(self.path, fn)
247            ispkg = False
248
249            if not modname and os.path.isdir(path) and '.' not in fn:
250                modname = fn
251                try:
252                    dircontents = os.listdir(path)
253                except OSError:
254                    # ignore unreadable directories like import does
255                    dircontents = []
256                for fn in dircontents:
257                    subname = inspect.getmodulename(fn)
258                    if subname=='__init__':
259                        ispkg = True
260                        break
261                else:
262                    continue    # not a package
263
264            if modname and '.' not in modname:
265                yielded[modname] = 1
266                yield prefix + modname, ispkg
267
268
269class ImpLoader:
270    """PEP 302 Loader that wraps Python's "classic" import algorithm
271    """
272    code = source = None
273
274    def __init__(self, fullname, file, filename, etc):
275        warnings.warn("This emulation is deprecated, use 'importlib' instead",
276                      DeprecationWarning)
277        _import_imp()
278        self.file = file
279        self.filename = filename
280        self.fullname = fullname
281        self.etc = etc
282
283    def load_module(self, fullname):
284        self._reopen()
285        try:
286            mod = imp.load_module(fullname, self.file, self.filename, self.etc)
287        finally:
288            if self.file:
289                self.file.close()
290        # Note: we don't set __loader__ because we want the module to look
291        # normal; i.e. this is just a wrapper for standard import machinery
292        return mod
293
294    def get_data(self, pathname):
295        with open(pathname, "rb") as file:
296            return file.read()
297
298    def _reopen(self):
299        if self.file and self.file.closed:
300            mod_type = self.etc[2]
301            if mod_type==imp.PY_SOURCE:
302                self.file = open(self.filename, 'r')
303            elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
304                self.file = open(self.filename, 'rb')
305
306    def _fix_name(self, fullname):
307        if fullname is None:
308            fullname = self.fullname
309        elif fullname != self.fullname:
310            raise ImportError("Loader for module %s cannot handle "
311                              "module %s" % (self.fullname, fullname))
312        return fullname
313
314    def is_package(self, fullname):
315        fullname = self._fix_name(fullname)
316        return self.etc[2]==imp.PKG_DIRECTORY
317
318    def get_code(self, fullname=None):
319        fullname = self._fix_name(fullname)
320        if self.code is None:
321            mod_type = self.etc[2]
322            if mod_type==imp.PY_SOURCE:
323                source = self.get_source(fullname)
324                self.code = compile(source, self.filename, 'exec')
325            elif mod_type==imp.PY_COMPILED:
326                self._reopen()
327                try:
328                    self.code = read_code(self.file)
329                finally:
330                    self.file.close()
331            elif mod_type==imp.PKG_DIRECTORY:
332                self.code = self._get_delegate().get_code()
333        return self.code
334
335    def get_source(self, fullname=None):
336        fullname = self._fix_name(fullname)
337        if self.source is None:
338            mod_type = self.etc[2]
339            if mod_type==imp.PY_SOURCE:
340                self._reopen()
341                try:
342                    self.source = self.file.read()
343                finally:
344                    self.file.close()
345            elif mod_type==imp.PY_COMPILED:
346                if os.path.exists(self.filename[:-1]):
347                    with open(self.filename[:-1], 'r') as f:
348                        self.source = f.read()
349            elif mod_type==imp.PKG_DIRECTORY:
350                self.source = self._get_delegate().get_source()
351        return self.source
352
353    def _get_delegate(self):
354        finder = ImpImporter(self.filename)
355        spec = _get_spec(finder, '__init__')
356        return spec.loader
357
358    def get_filename(self, fullname=None):
359        fullname = self._fix_name(fullname)
360        mod_type = self.etc[2]
361        if mod_type==imp.PKG_DIRECTORY:
362            return self._get_delegate().get_filename()
363        elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
364            return self.filename
365        return None
366
367
368try:
369    import zipimport
370    from zipimport import zipimporter
371
372    def iter_zipimport_modules(importer, prefix=''):
373        dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
374        _prefix = importer.prefix
375        plen = len(_prefix)
376        yielded = {}
377        import inspect
378        for fn in dirlist:
379            if not fn.startswith(_prefix):
380                continue
381
382            fn = fn[plen:].split(os.sep)
383
384            if len(fn)==2 and fn[1].startswith('__init__.py'):
385                if fn[0] not in yielded:
386                    yielded[fn[0]] = 1
387                    yield prefix + fn[0], True
388
389            if len(fn)!=1:
390                continue
391
392            modname = inspect.getmodulename(fn[0])
393            if modname=='__init__':
394                continue
395
396            if modname and '.' not in modname and modname not in yielded:
397                yielded[modname] = 1
398                yield prefix + modname, False
399
400    iter_importer_modules.register(zipimporter, iter_zipimport_modules)
401
402except ImportError:
403    pass
404
405
406def get_importer(path_item):
407    """Retrieve a finder for the given path item
408
409    The returned finder is cached in sys.path_importer_cache
410    if it was newly created by a path hook.
411
412    The cache (or part of it) can be cleared manually if a
413    rescan of sys.path_hooks is necessary.
414    """
415    try:
416        importer = sys.path_importer_cache[path_item]
417    except KeyError:
418        for path_hook in sys.path_hooks:
419            try:
420                importer = path_hook(path_item)
421                sys.path_importer_cache.setdefault(path_item, importer)
422                break
423            except ImportError:
424                pass
425        else:
426            importer = None
427    return importer
428
429
430def iter_importers(fullname=""):
431    """Yield finders for the given module name
432
433    If fullname contains a '.', the finders will be for the package
434    containing fullname, otherwise they will be all registered top level
435    finders (i.e. those on both sys.meta_path and sys.path_hooks).
436
437    If the named module is in a package, that package is imported as a side
438    effect of invoking this function.
439
440    If no module name is specified, all top level finders are produced.
441    """
442    if fullname.startswith('.'):
443        msg = "Relative module name {!r} not supported".format(fullname)
444        raise ImportError(msg)
445    if '.' in fullname:
446        # Get the containing package's __path__
447        pkg_name = fullname.rpartition(".")[0]
448        pkg = importlib.import_module(pkg_name)
449        path = getattr(pkg, '__path__', None)
450        if path is None:
451            return
452    else:
453        yield from sys.meta_path
454        path = sys.path
455    for item in path:
456        yield get_importer(item)
457
458
459def get_loader(module_or_name):
460    """Get a "loader" object for module_or_name
461
462    Returns None if the module cannot be found or imported.
463    If the named module is not already imported, its containing package
464    (if any) is imported, in order to establish the package __path__.
465    """
466    if module_or_name in sys.modules:
467        module_or_name = sys.modules[module_or_name]
468        if module_or_name is None:
469            return None
470    if isinstance(module_or_name, ModuleType):
471        module = module_or_name
472        loader = getattr(module, '__loader__', None)
473        if loader is not None:
474            return loader
475        if getattr(module, '__spec__', None) is None:
476            return None
477        fullname = module.__name__
478    else:
479        fullname = module_or_name
480    return find_loader(fullname)
481
482
483def find_loader(fullname):
484    """Find a "loader" object for fullname
485
486    This is a backwards compatibility wrapper around
487    importlib.util.find_spec that converts most failures to ImportError
488    and only returns the loader rather than the full spec
489    """
490    if fullname.startswith('.'):
491        msg = "Relative module name {!r} not supported".format(fullname)
492        raise ImportError(msg)
493    try:
494        spec = importlib.util.find_spec(fullname)
495    except (ImportError, AttributeError, TypeError, ValueError) as ex:
496        # This hack fixes an impedance mismatch between pkgutil and
497        # importlib, where the latter raises other errors for cases where
498        # pkgutil previously raised ImportError
499        msg = "Error while finding loader for {!r} ({}: {})"
500        raise ImportError(msg.format(fullname, type(ex), ex)) from ex
501    return spec.loader if spec is not None else None
502
503
504def extend_path(path, name):
505    """Extend a package's path.
506
507    Intended use is to place the following code in a package's __init__.py:
508
509        from pkgutil import extend_path
510        __path__ = extend_path(__path__, __name__)
511
512    This will add to the package's __path__ all subdirectories of
513    directories on sys.path named after the package.  This is useful
514    if one wants to distribute different parts of a single logical
515    package as multiple directories.
516
517    It also looks for *.pkg files beginning where * matches the name
518    argument.  This feature is similar to *.pth files (see site.py),
519    except that it doesn't special-case lines starting with 'import'.
520    A *.pkg file is trusted at face value: apart from checking for
521    duplicates, all entries found in a *.pkg file are added to the
522    path, regardless of whether they are exist the filesystem.  (This
523    is a feature.)
524
525    If the input path is not a list (as is the case for frozen
526    packages) it is returned unchanged.  The input path is not
527    modified; an extended copy is returned.  Items are only appended
528    to the copy at the end.
529
530    It is assumed that sys.path is a sequence.  Items of sys.path that
531    are not (unicode or 8-bit) strings referring to existing
532    directories are ignored.  Unicode items of sys.path that cause
533    errors when used as filenames may cause this function to raise an
534    exception (in line with os.path.isdir() behavior).
535    """
536
537    if not isinstance(path, list):
538        # This could happen e.g. when this is called from inside a
539        # frozen package.  Return the path unchanged in that case.
540        return path
541
542    sname_pkg = name + ".pkg"
543
544    path = path[:] # Start with a copy of the existing path
545
546    parent_package, _, final_name = name.rpartition('.')
547    if parent_package:
548        try:
549            search_path = sys.modules[parent_package].__path__
550        except (KeyError, AttributeError):
551            # We can't do anything: find_loader() returns None when
552            # passed a dotted name.
553            return path
554    else:
555        search_path = sys.path
556
557    for dir in search_path:
558        if not isinstance(dir, str):
559            continue
560
561        finder = get_importer(dir)
562        if finder is not None:
563            portions = []
564            if hasattr(finder, 'find_spec'):
565                spec = finder.find_spec(final_name)
566                if spec is not None:
567                    portions = spec.submodule_search_locations or []
568            # Is this finder PEP 420 compliant?
569            elif hasattr(finder, 'find_loader'):
570                _, portions = finder.find_loader(final_name)
571
572            for portion in portions:
573                # XXX This may still add duplicate entries to path on
574                # case-insensitive filesystems
575                if portion not in path:
576                    path.append(portion)
577
578        # XXX Is this the right thing for subpackages like zope.app?
579        # It looks for a file named "zope.app.pkg"
580        pkgfile = os.path.join(dir, sname_pkg)
581        if os.path.isfile(pkgfile):
582            try:
583                f = open(pkgfile)
584            except OSError as msg:
585                sys.stderr.write("Can't open %s: %s\n" %
586                                 (pkgfile, msg))
587            else:
588                with f:
589                    for line in f:
590                        line = line.rstrip('\n')
591                        if not line or line.startswith('#'):
592                            continue
593                        path.append(line) # Don't check for existence!
594
595    return path
596
597
598def get_data(package, resource):
599    """Get a resource from a package.
600
601    This is a wrapper round the PEP 302 loader get_data API. The package
602    argument should be the name of a package, in standard module format
603    (foo.bar). The resource argument should be in the form of a relative
604    filename, using '/' as the path separator. The parent directory name '..'
605    is not allowed, and nor is a rooted name (starting with a '/').
606
607    The function returns a binary string, which is the contents of the
608    specified resource.
609
610    For packages located in the filesystem, which have already been imported,
611    this is the rough equivalent of
612
613        d = os.path.dirname(sys.modules[package].__file__)
614        data = open(os.path.join(d, resource), 'rb').read()
615
616    If the package cannot be located or loaded, or it uses a PEP 302 loader
617    which does not support get_data(), then None is returned.
618    """
619
620    spec = importlib.util.find_spec(package)
621    if spec is None:
622        return None
623    loader = spec.loader
624    if loader is None or not hasattr(loader, 'get_data'):
625        return None
626    # XXX needs test
627    mod = (sys.modules.get(package) or
628           importlib._bootstrap._load(spec))
629    if mod is None or not hasattr(mod, '__file__'):
630        return None
631
632    # Modify the resource name to be compatible with the loader.get_data
633    # signature - an os.path format "filename" starting with the dirname of
634    # the package's __file__
635    parts = resource.split('/')
636    parts.insert(0, os.path.dirname(mod.__file__))
637    resource_name = os.path.join(*parts)
638    return loader.get_data(resource_name)
639
640
641_DOTTED_WORDS = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
642_NAME_PATTERN = re.compile(f'^(?P<pkg>{_DOTTED_WORDS})(?P<cln>:(?P<obj>{_DOTTED_WORDS})?)?$', re.U)
643del _DOTTED_WORDS
644
645def resolve_name(name):
646    """
647    Resolve a name to an object.
648
649    It is expected that `name` will be a string in one of the following
650    formats, where W is shorthand for a valid Python identifier and dot stands
651    for a literal period in these pseudo-regexes:
652
653    W(.W)*
654    W(.W)*:(W(.W)*)?
655
656    The first form is intended for backward compatibility only. It assumes that
657    some part of the dotted name is a package, and the rest is an object
658    somewhere within that package, possibly nested inside other objects.
659    Because the place where the package stops and the object hierarchy starts
660    can't be inferred by inspection, repeated attempts to import must be done
661    with this form.
662
663    In the second form, the caller makes the division point clear through the
664    provision of a single colon: the dotted name to the left of the colon is a
665    package to be imported, and the dotted name to the right is the object
666    hierarchy within that package. Only one import is needed in this form. If
667    it ends with the colon, then a module object is returned.
668
669    The function will return an object (which might be a module), or raise one
670    of the following exceptions:
671
672    ValueError - if `name` isn't in a recognised format
673    ImportError - if an import failed when it shouldn't have
674    AttributeError - if a failure occurred when traversing the object hierarchy
675                     within the imported package to get to the desired object)
676    """
677    m = _NAME_PATTERN.match(name)
678    if not m:
679        raise ValueError(f'invalid format: {name!r}')
680    gd = m.groupdict()
681    if gd.get('cln'):
682        # there is a colon - a one-step import is all that's needed
683        mod = importlib.import_module(gd['pkg'])
684        parts = gd.get('obj')
685        parts = parts.split('.') if parts else []
686    else:
687        # no colon - have to iterate to find the package boundary
688        parts = name.split('.')
689        modname = parts.pop(0)
690        # first part *must* be a module/package.
691        mod = importlib.import_module(modname)
692        while parts:
693            p = parts[0]
694            s = f'{modname}.{p}'
695            try:
696                mod = importlib.import_module(s)
697                parts.pop(0)
698                modname = s
699            except ImportError:
700                break
701    # if we reach this point, mod is the module, already imported, and
702    # parts is the list of parts in the object hierarchy to be traversed, or
703    # an empty list if just the module is wanted.
704    result = mod
705    for p in parts:
706        result = getattr(result, p)
707    return result
708