• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Utilities to support packages."""
2
3import os
4import sys
5import imp
6import os.path
7from types import ModuleType
8
9__all__ = [
10    'get_importer', 'iter_importers', 'get_loader', 'find_loader',
11    'walk_packages', 'iter_modules', 'get_data',
12    'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
13]
14
15def read_code(stream):
16    # This helper is needed in order for the PEP 302 emulation to
17    # correctly handle compiled files
18    import marshal
19
20    magic = stream.read(4)
21    if magic != imp.get_magic():
22        return None
23
24    stream.read(4) # Skip timestamp
25    return marshal.load(stream)
26
27
28def simplegeneric(func):
29    """Make a trivial single-dispatch generic function"""
30    registry = {}
31    def wrapper(*args, **kw):
32        ob = args[0]
33        try:
34            cls = ob.__class__
35        except AttributeError:
36            cls = type(ob)
37        try:
38            mro = cls.__mro__
39        except AttributeError:
40            try:
41                class cls(cls, object):
42                    pass
43                mro = cls.__mro__[1:]
44            except TypeError:
45                mro = object,   # must be an ExtensionClass or some such  :(
46        for t in mro:
47            if t in registry:
48                return registry[t](*args, **kw)
49        else:
50            return func(*args, **kw)
51    try:
52        wrapper.__name__ = func.__name__
53    except (TypeError, AttributeError):
54        pass    # Python 2.3 doesn't allow functions to be renamed
55
56    def register(typ, func=None):
57        if func is None:
58            return lambda f: register(typ, f)
59        registry[typ] = func
60        return func
61
62    wrapper.__dict__ = func.__dict__
63    wrapper.__doc__ = func.__doc__
64    wrapper.register = register
65    return wrapper
66
67
68def walk_packages(path=None, prefix='', onerror=None):
69    """Yields (module_loader, name, ispkg) for all modules recursively
70    on path, or, if path is None, all accessible modules.
71
72    'path' should be either None or a list of paths to look for
73    modules in.
74
75    'prefix' is a string to output on the front of every module name
76    on output.
77
78    Note that this function must import all *packages* (NOT all
79    modules!) on the given path, in order to access the __path__
80    attribute to find submodules.
81
82    'onerror' is a function which gets called with one argument (the
83    name of the package which was being imported) if any exception
84    occurs while trying to import a package.  If no onerror function is
85    supplied, ImportErrors are caught and ignored, while all other
86    exceptions are propagated, terminating the search.
87
88    Examples:
89
90    # list all modules python can access
91    walk_packages()
92
93    # list all submodules of ctypes
94    walk_packages(ctypes.__path__, ctypes.__name__+'.')
95    """
96
97    def seen(p, m={}):
98        if p in m:
99            return True
100        m[p] = True
101
102    for importer, name, ispkg in iter_modules(path, prefix):
103        yield importer, name, ispkg
104
105        if ispkg:
106            try:
107                __import__(name)
108            except ImportError:
109                if onerror is not None:
110                    onerror(name)
111            except Exception:
112                if onerror is not None:
113                    onerror(name)
114                else:
115                    raise
116            else:
117                path = getattr(sys.modules[name], '__path__', None) or []
118
119                # don't traverse path items we've seen before
120                path = [p for p in path if not seen(p)]
121
122                for item in walk_packages(path, name+'.', onerror):
123                    yield item
124
125
126def iter_modules(path=None, prefix=''):
127    """Yields (module_loader, name, ispkg) for all submodules on path,
128    or, if path is None, all top-level modules on sys.path.
129
130    'path' should be either None or a list of paths to look for
131    modules in.
132
133    'prefix' is a string to output on the front of every module name
134    on output.
135    """
136
137    if path is None:
138        importers = iter_importers()
139    else:
140        importers = map(get_importer, path)
141
142    yielded = {}
143    for i in importers:
144        for name, ispkg in iter_importer_modules(i, prefix):
145            if name not in yielded:
146                yielded[name] = 1
147                yield i, name, ispkg
148
149
150#@simplegeneric
151def iter_importer_modules(importer, prefix=''):
152    if not hasattr(importer, 'iter_modules'):
153        return []
154    return importer.iter_modules(prefix)
155
156iter_importer_modules = simplegeneric(iter_importer_modules)
157
158
159class ImpImporter:
160    """PEP 302 Importer that wraps Python's "classic" import algorithm
161
162    ImpImporter(dirname) produces a PEP 302 importer that searches that
163    directory.  ImpImporter(None) produces a PEP 302 importer that searches
164    the current sys.path, plus any modules that are frozen or built-in.
165
166    Note that ImpImporter does not currently support being used by placement
167    on sys.meta_path.
168    """
169
170    def __init__(self, path=None):
171        self.path = path
172
173    def find_module(self, fullname, path=None):
174        # Note: we ignore 'path' argument since it is only used via meta_path
175        subname = fullname.split(".")[-1]
176        if subname != fullname and self.path is None:
177            return None
178        if self.path is None:
179            path = None
180        else:
181            path = [os.path.realpath(self.path)]
182        try:
183            file, filename, etc = imp.find_module(subname, path)
184        except ImportError:
185            return None
186        return ImpLoader(fullname, file, filename, etc)
187
188    def iter_modules(self, prefix=''):
189        if self.path is None or not os.path.isdir(self.path):
190            return
191
192        yielded = {}
193        import inspect
194        try:
195            filenames = os.listdir(self.path)
196        except OSError:
197            # ignore unreadable directories like import does
198            filenames = []
199        filenames.sort()  # handle packages before same-named modules
200
201        for fn in filenames:
202            modname = inspect.getmodulename(fn)
203            if modname=='__init__' or modname in yielded:
204                continue
205
206            path = os.path.join(self.path, fn)
207            ispkg = False
208
209            if not modname and os.path.isdir(path) and '.' not in fn:
210                modname = fn
211                try:
212                    dircontents = os.listdir(path)
213                except OSError:
214                    # ignore unreadable directories like import does
215                    dircontents = []
216                for fn in dircontents:
217                    subname = inspect.getmodulename(fn)
218                    if subname=='__init__':
219                        ispkg = True
220                        break
221                else:
222                    continue    # not a package
223
224            if modname and '.' not in modname:
225                yielded[modname] = 1
226                yield prefix + modname, ispkg
227
228
229class ImpLoader:
230    """PEP 302 Loader that wraps Python's "classic" import algorithm
231    """
232    code = source = None
233
234    def __init__(self, fullname, file, filename, etc):
235        self.file = file
236        self.filename = filename
237        self.fullname = fullname
238        self.etc = etc
239
240    def load_module(self, fullname):
241        self._reopen()
242        try:
243            mod = imp.load_module(fullname, self.file, self.filename, self.etc)
244        finally:
245            if self.file:
246                self.file.close()
247        # Note: we don't set __loader__ because we want the module to look
248        # normal; i.e. this is just a wrapper for standard import machinery
249        return mod
250
251    def get_data(self, pathname):
252        with open(pathname, "rb") as file:
253            return file.read()
254
255    def _reopen(self):
256        if self.file and self.file.closed:
257            mod_type = self.etc[2]
258            if mod_type==imp.PY_SOURCE:
259                self.file = open(self.filename, 'rU')
260            elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
261                self.file = open(self.filename, 'rb')
262
263    def _fix_name(self, fullname):
264        if fullname is None:
265            fullname = self.fullname
266        elif fullname != self.fullname:
267            raise ImportError("Loader for module %s cannot handle "
268                              "module %s" % (self.fullname, fullname))
269        return fullname
270
271    def is_package(self, fullname):
272        fullname = self._fix_name(fullname)
273        return self.etc[2]==imp.PKG_DIRECTORY
274
275    def get_code(self, fullname=None):
276        fullname = self._fix_name(fullname)
277        if self.code is None:
278            mod_type = self.etc[2]
279            if mod_type==imp.PY_SOURCE:
280                source = self.get_source(fullname)
281                self.code = compile(source, self.filename, 'exec')
282            elif mod_type==imp.PY_COMPILED:
283                self._reopen()
284                try:
285                    self.code = read_code(self.file)
286                finally:
287                    self.file.close()
288            elif mod_type==imp.PKG_DIRECTORY:
289                self.code = self._get_delegate().get_code()
290        return self.code
291
292    def get_source(self, fullname=None):
293        fullname = self._fix_name(fullname)
294        if self.source is None:
295            mod_type = self.etc[2]
296            if mod_type==imp.PY_SOURCE:
297                self._reopen()
298                try:
299                    self.source = self.file.read()
300                finally:
301                    self.file.close()
302            elif mod_type==imp.PY_COMPILED:
303                if os.path.exists(self.filename[:-1]):
304                    f = open(self.filename[:-1], 'rU')
305                    self.source = f.read()
306                    f.close()
307            elif mod_type==imp.PKG_DIRECTORY:
308                self.source = self._get_delegate().get_source()
309        return self.source
310
311
312    def _get_delegate(self):
313        return ImpImporter(self.filename).find_module('__init__')
314
315    def get_filename(self, fullname=None):
316        fullname = self._fix_name(fullname)
317        mod_type = self.etc[2]
318        if self.etc[2]==imp.PKG_DIRECTORY:
319            return self._get_delegate().get_filename()
320        elif self.etc[2] in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
321            return self.filename
322        return None
323
324
325try:
326    import zipimport
327    from zipimport import zipimporter
328
329    def iter_zipimport_modules(importer, prefix=''):
330        dirlist = zipimport._zip_directory_cache[importer.archive].keys()
331        dirlist.sort()
332        _prefix = importer.prefix
333        plen = len(_prefix)
334        yielded = {}
335        import inspect
336        for fn in dirlist:
337            if not fn.startswith(_prefix):
338                continue
339
340            fn = fn[plen:].split(os.sep)
341
342            if len(fn)==2 and fn[1].startswith('__init__.py'):
343                if fn[0] not in yielded:
344                    yielded[fn[0]] = 1
345                    yield fn[0], True
346
347            if len(fn)!=1:
348                continue
349
350            modname = inspect.getmodulename(fn[0])
351            if modname=='__init__':
352                continue
353
354            if modname and '.' not in modname and modname not in yielded:
355                yielded[modname] = 1
356                yield prefix + modname, False
357
358    iter_importer_modules.register(zipimporter, iter_zipimport_modules)
359
360except ImportError:
361    pass
362
363
364def get_importer(path_item):
365    """Retrieve a PEP 302 importer for the given path item
366
367    The returned importer is cached in sys.path_importer_cache
368    if it was newly created by a path hook.
369
370    If there is no importer, a wrapper around the basic import
371    machinery is returned. This wrapper is never inserted into
372    the importer cache (None is inserted instead).
373
374    The cache (or part of it) can be cleared manually if a
375    rescan of sys.path_hooks is necessary.
376    """
377    try:
378        importer = sys.path_importer_cache[path_item]
379    except KeyError:
380        for path_hook in sys.path_hooks:
381            try:
382                importer = path_hook(path_item)
383                break
384            except ImportError:
385                pass
386        else:
387            importer = None
388        sys.path_importer_cache.setdefault(path_item, importer)
389
390    if importer is None:
391        try:
392            importer = ImpImporter(path_item)
393        except ImportError:
394            importer = None
395    return importer
396
397
398def iter_importers(fullname=""):
399    """Yield PEP 302 importers for the given module name
400
401    If fullname contains a '.', the importers will be for the package
402    containing fullname, otherwise they will be importers for sys.meta_path,
403    sys.path, and Python's "classic" import machinery, in that order.  If
404    the named module is in a package, that package is imported as a side
405    effect of invoking this function.
406
407    Non PEP 302 mechanisms (e.g. the Windows registry) used by the
408    standard import machinery to find files in alternative locations
409    are partially supported, but are searched AFTER sys.path. Normally,
410    these locations are searched BEFORE sys.path, preventing sys.path
411    entries from shadowing them.
412
413    For this to cause a visible difference in behaviour, there must
414    be a module or package name that is accessible via both sys.path
415    and one of the non PEP 302 file system mechanisms. In this case,
416    the emulation will find the former version, while the builtin
417    import mechanism will find the latter.
418
419    Items of the following types can be affected by this discrepancy:
420        imp.C_EXTENSION, imp.PY_SOURCE, imp.PY_COMPILED, imp.PKG_DIRECTORY
421    """
422    if fullname.startswith('.'):
423        raise ImportError("Relative module names not supported")
424    if '.' in fullname:
425        # Get the containing package's __path__
426        pkg = '.'.join(fullname.split('.')[:-1])
427        if pkg not in sys.modules:
428            __import__(pkg)
429        path = getattr(sys.modules[pkg], '__path__', None) or []
430    else:
431        for importer in sys.meta_path:
432            yield importer
433        path = sys.path
434    for item in path:
435        yield get_importer(item)
436    if '.' not in fullname:
437        yield ImpImporter()
438
439def get_loader(module_or_name):
440    """Get a PEP 302 "loader" object for module_or_name
441
442    If the module or package is accessible via the normal import
443    mechanism, a wrapper around the relevant part of that machinery
444    is returned.  Returns None if the module cannot be found or imported.
445    If the named module is not already imported, its containing package
446    (if any) is imported, in order to establish the package __path__.
447
448    This function uses iter_importers(), and is thus subject to the same
449    limitations regarding platform-specific special import locations such
450    as the Windows registry.
451    """
452    if module_or_name in sys.modules:
453        module_or_name = sys.modules[module_or_name]
454    if isinstance(module_or_name, ModuleType):
455        module = module_or_name
456        loader = getattr(module, '__loader__', None)
457        if loader is not None:
458            return loader
459        fullname = module.__name__
460    else:
461        fullname = module_or_name
462    return find_loader(fullname)
463
464def find_loader(fullname):
465    """Find a PEP 302 "loader" object for fullname
466
467    If fullname contains dots, path must be the containing package's __path__.
468    Returns None if the module cannot be found or imported. This function uses
469    iter_importers(), and is thus subject to the same limitations regarding
470    platform-specific special import locations such as the Windows registry.
471    """
472    for importer in iter_importers(fullname):
473        loader = importer.find_module(fullname)
474        if loader is not None:
475            return loader
476
477    return None
478
479
480def extend_path(path, name):
481    """Extend a package's path.
482
483    Intended use is to place the following code in a package's __init__.py:
484
485        from pkgutil import extend_path
486        __path__ = extend_path(__path__, __name__)
487
488    This will add to the package's __path__ all subdirectories of
489    directories on sys.path named after the package.  This is useful
490    if one wants to distribute different parts of a single logical
491    package as multiple directories.
492
493    It also looks for *.pkg files beginning where * matches the name
494    argument.  This feature is similar to *.pth files (see site.py),
495    except that it doesn't special-case lines starting with 'import'.
496    A *.pkg file is trusted at face value: apart from checking for
497    duplicates, all entries found in a *.pkg file are added to the
498    path, regardless of whether they are exist the filesystem.  (This
499    is a feature.)
500
501    If the input path is not a list (as is the case for frozen
502    packages) it is returned unchanged.  The input path is not
503    modified; an extended copy is returned.  Items are only appended
504    to the copy at the end.
505
506    It is assumed that sys.path is a sequence.  Items of sys.path that
507    are not (unicode or 8-bit) strings referring to existing
508    directories are ignored.  Unicode items of sys.path that cause
509    errors when used as filenames may cause this function to raise an
510    exception (in line with os.path.isdir() behavior).
511    """
512
513    if not isinstance(path, list):
514        # This could happen e.g. when this is called from inside a
515        # frozen package.  Return the path unchanged in that case.
516        return path
517
518    pname = os.path.join(*name.split('.')) # Reconstitute as relative path
519    # Just in case os.extsep != '.'
520    sname = os.extsep.join(name.split('.'))
521    sname_pkg = sname + os.extsep + "pkg"
522    init_py = "__init__" + os.extsep + "py"
523
524    path = path[:] # Start with a copy of the existing path
525
526    for dir in sys.path:
527        if not isinstance(dir, basestring) or not os.path.isdir(dir):
528            continue
529        subdir = os.path.join(dir, pname)
530        # XXX This may still add duplicate entries to path on
531        # case-insensitive filesystems
532        initfile = os.path.join(subdir, init_py)
533        if subdir not in path and os.path.isfile(initfile):
534            path.append(subdir)
535        # XXX Is this the right thing for subpackages like zope.app?
536        # It looks for a file named "zope.app.pkg"
537        pkgfile = os.path.join(dir, sname_pkg)
538        if os.path.isfile(pkgfile):
539            try:
540                f = open(pkgfile)
541            except IOError, msg:
542                sys.stderr.write("Can't open %s: %s\n" %
543                                 (pkgfile, msg))
544            else:
545                for line in f:
546                    line = line.rstrip('\n')
547                    if not line or line.startswith('#'):
548                        continue
549                    path.append(line) # Don't check for existence!
550                f.close()
551
552    return path
553
554def get_data(package, resource):
555    """Get a resource from a package.
556
557    This is a wrapper round the PEP 302 loader get_data API. The package
558    argument should be the name of a package, in standard module format
559    (foo.bar). The resource argument should be in the form of a relative
560    filename, using '/' as the path separator. The parent directory name '..'
561    is not allowed, and nor is a rooted name (starting with a '/').
562
563    The function returns a binary string, which is the contents of the
564    specified resource.
565
566    For packages located in the filesystem, which have already been imported,
567    this is the rough equivalent of
568
569        d = os.path.dirname(sys.modules[package].__file__)
570        data = open(os.path.join(d, resource), 'rb').read()
571
572    If the package cannot be located or loaded, or it uses a PEP 302 loader
573    which does not support get_data(), then None is returned.
574    """
575
576    loader = get_loader(package)
577    if loader is None or not hasattr(loader, 'get_data'):
578        return None
579    mod = sys.modules.get(package) or loader.load_module(package)
580    if mod is None or not hasattr(mod, '__file__'):
581        return None
582
583    # Modify the resource name to be compatible with the loader.get_data
584    # signature - an os.path format "filename" starting with the dirname of
585    # the package's __file__
586    parts = resource.split('/')
587    parts.insert(0, os.path.dirname(mod.__file__))
588    resource_name = os.path.join(*parts)
589    return loader.get_data(resource_name)
590