1"""Utilities to support packages.""" 2 3from collections import namedtuple 4from functools import singledispatch as simplegeneric 5import importlib 6import importlib.util 7import importlib.machinery 8import os 9import os.path 10import sys 11from types import ModuleType 12import warnings 13 14__all__ = [ 15 'get_importer', 'iter_importers', 'get_loader', 'find_loader', 16 'walk_packages', 'iter_modules', 'get_data', 17 'ImpImporter', 'ImpLoader', 'read_code', 'extend_path', 18 'ModuleInfo', 19] 20 21 22ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg') 23ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.' 24 25 26def _get_spec(finder, name): 27 """Return the finder-specific module spec.""" 28 # Works with legacy finders. 29 try: 30 find_spec = finder.find_spec 31 except AttributeError: 32 loader = finder.find_module(name) 33 if loader is None: 34 return None 35 return importlib.util.spec_from_loader(name, loader) 36 else: 37 return find_spec(name) 38 39 40def read_code(stream): 41 # This helper is needed in order for the PEP 302 emulation to 42 # correctly handle compiled files 43 import marshal 44 45 magic = stream.read(4) 46 if magic != importlib.util.MAGIC_NUMBER: 47 return None 48 49 stream.read(12) # Skip rest of the header 50 return marshal.load(stream) 51 52 53def walk_packages(path=None, prefix='', onerror=None): 54 """Yields ModuleInfo for all modules recursively 55 on path, or, if path is None, all accessible modules. 56 57 'path' should be either None or a list of paths to look for 58 modules in. 59 60 'prefix' is a string to output on the front of every module name 61 on output. 62 63 Note that this function must import all *packages* (NOT all 64 modules!) on the given path, in order to access the __path__ 65 attribute to find submodules. 66 67 'onerror' is a function which gets called with one argument (the 68 name of the package which was being imported) if any exception 69 occurs while trying to import a package. If no onerror function is 70 supplied, ImportErrors are caught and ignored, while all other 71 exceptions are propagated, terminating the search. 72 73 Examples: 74 75 # list all modules python can access 76 walk_packages() 77 78 # list all submodules of ctypes 79 walk_packages(ctypes.__path__, ctypes.__name__+'.') 80 """ 81 82 def seen(p, m={}): 83 if p in m: 84 return True 85 m[p] = True 86 87 for info in iter_modules(path, prefix): 88 yield info 89 90 if info.ispkg: 91 try: 92 __import__(info.name) 93 except ImportError: 94 if onerror is not None: 95 onerror(info.name) 96 except Exception: 97 if onerror is not None: 98 onerror(info.name) 99 else: 100 raise 101 else: 102 path = getattr(sys.modules[info.name], '__path__', None) or [] 103 104 # don't traverse path items we've seen before 105 path = [p for p in path if not seen(p)] 106 107 yield from walk_packages(path, info.name+'.', onerror) 108 109 110def iter_modules(path=None, prefix=''): 111 """Yields ModuleInfo for all submodules on path, 112 or, if path is None, all top-level modules on sys.path. 113 114 'path' should be either None or a list of paths to look for 115 modules in. 116 117 'prefix' is a string to output on the front of every module name 118 on output. 119 """ 120 if path is None: 121 importers = iter_importers() 122 elif isinstance(path, str): 123 raise ValueError("path must be None or list of paths to look for " 124 "modules in") 125 else: 126 importers = map(get_importer, path) 127 128 yielded = {} 129 for i in importers: 130 for name, ispkg in iter_importer_modules(i, prefix): 131 if name not in yielded: 132 yielded[name] = 1 133 yield ModuleInfo(i, name, ispkg) 134 135 136@simplegeneric 137def iter_importer_modules(importer, prefix=''): 138 if not hasattr(importer, 'iter_modules'): 139 return [] 140 return importer.iter_modules(prefix) 141 142 143# Implement a file walker for the normal importlib path hook 144def _iter_file_finder_modules(importer, prefix=''): 145 if importer.path is None or not os.path.isdir(importer.path): 146 return 147 148 yielded = {} 149 import inspect 150 try: 151 filenames = os.listdir(importer.path) 152 except OSError: 153 # ignore unreadable directories like import does 154 filenames = [] 155 filenames.sort() # handle packages before same-named modules 156 157 for fn in filenames: 158 modname = inspect.getmodulename(fn) 159 if modname=='__init__' or modname in yielded: 160 continue 161 162 path = os.path.join(importer.path, fn) 163 ispkg = False 164 165 if not modname and os.path.isdir(path) and '.' not in fn: 166 modname = fn 167 try: 168 dircontents = os.listdir(path) 169 except OSError: 170 # ignore unreadable directories like import does 171 dircontents = [] 172 for fn in dircontents: 173 subname = inspect.getmodulename(fn) 174 if subname=='__init__': 175 ispkg = True 176 break 177 else: 178 continue # not a package 179 180 if modname and '.' not in modname: 181 yielded[modname] = 1 182 yield prefix + modname, ispkg 183 184iter_importer_modules.register( 185 importlib.machinery.FileFinder, _iter_file_finder_modules) 186 187 188def _import_imp(): 189 global imp 190 with warnings.catch_warnings(): 191 warnings.simplefilter('ignore', DeprecationWarning) 192 imp = importlib.import_module('imp') 193 194class ImpImporter: 195 """PEP 302 Finder that wraps Python's "classic" import algorithm 196 197 ImpImporter(dirname) produces a PEP 302 finder that searches that 198 directory. ImpImporter(None) produces a PEP 302 finder that searches 199 the current sys.path, plus any modules that are frozen or built-in. 200 201 Note that ImpImporter does not currently support being used by placement 202 on sys.meta_path. 203 """ 204 205 def __init__(self, path=None): 206 global imp 207 warnings.warn("This emulation is deprecated and slated for removal " 208 "in Python 3.12; use 'importlib' instead", 209 DeprecationWarning) 210 _import_imp() 211 self.path = path 212 213 def find_module(self, fullname, path=None): 214 # Note: we ignore 'path' argument since it is only used via meta_path 215 subname = fullname.split(".")[-1] 216 if subname != fullname and self.path is None: 217 return None 218 if self.path is None: 219 path = None 220 else: 221 path = [os.path.realpath(self.path)] 222 try: 223 file, filename, etc = imp.find_module(subname, path) 224 except ImportError: 225 return None 226 return ImpLoader(fullname, file, filename, etc) 227 228 def iter_modules(self, prefix=''): 229 if self.path is None or not os.path.isdir(self.path): 230 return 231 232 yielded = {} 233 import inspect 234 try: 235 filenames = os.listdir(self.path) 236 except OSError: 237 # ignore unreadable directories like import does 238 filenames = [] 239 filenames.sort() # handle packages before same-named modules 240 241 for fn in filenames: 242 modname = inspect.getmodulename(fn) 243 if modname=='__init__' or modname in yielded: 244 continue 245 246 path = os.path.join(self.path, fn) 247 ispkg = False 248 249 if not modname and os.path.isdir(path) and '.' not in fn: 250 modname = fn 251 try: 252 dircontents = os.listdir(path) 253 except OSError: 254 # ignore unreadable directories like import does 255 dircontents = [] 256 for fn in dircontents: 257 subname = inspect.getmodulename(fn) 258 if subname=='__init__': 259 ispkg = True 260 break 261 else: 262 continue # not a package 263 264 if modname and '.' not in modname: 265 yielded[modname] = 1 266 yield prefix + modname, ispkg 267 268 269class ImpLoader: 270 """PEP 302 Loader that wraps Python's "classic" import algorithm 271 """ 272 code = source = None 273 274 def __init__(self, fullname, file, filename, etc): 275 warnings.warn("This emulation is deprecated and slated for removal in " 276 "Python 3.12; use 'importlib' instead", 277 DeprecationWarning) 278 _import_imp() 279 self.file = file 280 self.filename = filename 281 self.fullname = fullname 282 self.etc = etc 283 284 def load_module(self, fullname): 285 self._reopen() 286 try: 287 mod = imp.load_module(fullname, self.file, self.filename, self.etc) 288 finally: 289 if self.file: 290 self.file.close() 291 # Note: we don't set __loader__ because we want the module to look 292 # normal; i.e. this is just a wrapper for standard import machinery 293 return mod 294 295 def get_data(self, pathname): 296 with open(pathname, "rb") as file: 297 return file.read() 298 299 def _reopen(self): 300 if self.file and self.file.closed: 301 mod_type = self.etc[2] 302 if mod_type==imp.PY_SOURCE: 303 self.file = open(self.filename, 'r') 304 elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION): 305 self.file = open(self.filename, 'rb') 306 307 def _fix_name(self, fullname): 308 if fullname is None: 309 fullname = self.fullname 310 elif fullname != self.fullname: 311 raise ImportError("Loader for module %s cannot handle " 312 "module %s" % (self.fullname, fullname)) 313 return fullname 314 315 def is_package(self, fullname): 316 fullname = self._fix_name(fullname) 317 return self.etc[2]==imp.PKG_DIRECTORY 318 319 def get_code(self, fullname=None): 320 fullname = self._fix_name(fullname) 321 if self.code is None: 322 mod_type = self.etc[2] 323 if mod_type==imp.PY_SOURCE: 324 source = self.get_source(fullname) 325 self.code = compile(source, self.filename, 'exec') 326 elif mod_type==imp.PY_COMPILED: 327 self._reopen() 328 try: 329 self.code = read_code(self.file) 330 finally: 331 self.file.close() 332 elif mod_type==imp.PKG_DIRECTORY: 333 self.code = self._get_delegate().get_code() 334 return self.code 335 336 def get_source(self, fullname=None): 337 fullname = self._fix_name(fullname) 338 if self.source is None: 339 mod_type = self.etc[2] 340 if mod_type==imp.PY_SOURCE: 341 self._reopen() 342 try: 343 self.source = self.file.read() 344 finally: 345 self.file.close() 346 elif mod_type==imp.PY_COMPILED: 347 if os.path.exists(self.filename[:-1]): 348 with open(self.filename[:-1], 'r') as f: 349 self.source = f.read() 350 elif mod_type==imp.PKG_DIRECTORY: 351 self.source = self._get_delegate().get_source() 352 return self.source 353 354 def _get_delegate(self): 355 finder = ImpImporter(self.filename) 356 spec = _get_spec(finder, '__init__') 357 return spec.loader 358 359 def get_filename(self, fullname=None): 360 fullname = self._fix_name(fullname) 361 mod_type = self.etc[2] 362 if mod_type==imp.PKG_DIRECTORY: 363 return self._get_delegate().get_filename() 364 elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION): 365 return self.filename 366 return None 367 368 369try: 370 import zipimport 371 from zipimport import zipimporter 372 373 def iter_zipimport_modules(importer, prefix=''): 374 dirlist = sorted(zipimport._zip_directory_cache[importer.archive]) 375 _prefix = importer.prefix 376 plen = len(_prefix) 377 yielded = {} 378 import inspect 379 for fn in dirlist: 380 if not fn.startswith(_prefix): 381 continue 382 383 fn = fn[plen:].split(os.sep) 384 385 if len(fn)==2 and fn[1].startswith('__init__.py'): 386 if fn[0] not in yielded: 387 yielded[fn[0]] = 1 388 yield prefix + fn[0], True 389 390 if len(fn)!=1: 391 continue 392 393 modname = inspect.getmodulename(fn[0]) 394 if modname=='__init__': 395 continue 396 397 if modname and '.' not in modname and modname not in yielded: 398 yielded[modname] = 1 399 yield prefix + modname, False 400 401 iter_importer_modules.register(zipimporter, iter_zipimport_modules) 402 403except ImportError: 404 pass 405 406 407def get_importer(path_item): 408 """Retrieve a finder for the given path item 409 410 The returned finder is cached in sys.path_importer_cache 411 if it was newly created by a path hook. 412 413 The cache (or part of it) can be cleared manually if a 414 rescan of sys.path_hooks is necessary. 415 """ 416 try: 417 importer = sys.path_importer_cache[path_item] 418 except KeyError: 419 for path_hook in sys.path_hooks: 420 try: 421 importer = path_hook(path_item) 422 sys.path_importer_cache.setdefault(path_item, importer) 423 break 424 except ImportError: 425 pass 426 else: 427 importer = None 428 return importer 429 430 431def iter_importers(fullname=""): 432 """Yield finders for the given module name 433 434 If fullname contains a '.', the finders will be for the package 435 containing fullname, otherwise they will be all registered top level 436 finders (i.e. those on both sys.meta_path and sys.path_hooks). 437 438 If the named module is in a package, that package is imported as a side 439 effect of invoking this function. 440 441 If no module name is specified, all top level finders are produced. 442 """ 443 if fullname.startswith('.'): 444 msg = "Relative module name {!r} not supported".format(fullname) 445 raise ImportError(msg) 446 if '.' in fullname: 447 # Get the containing package's __path__ 448 pkg_name = fullname.rpartition(".")[0] 449 pkg = importlib.import_module(pkg_name) 450 path = getattr(pkg, '__path__', None) 451 if path is None: 452 return 453 else: 454 yield from sys.meta_path 455 path = sys.path 456 for item in path: 457 yield get_importer(item) 458 459 460def get_loader(module_or_name): 461 """Get a "loader" object for module_or_name 462 463 Returns None if the module cannot be found or imported. 464 If the named module is not already imported, its containing package 465 (if any) is imported, in order to establish the package __path__. 466 """ 467 if module_or_name in sys.modules: 468 module_or_name = sys.modules[module_or_name] 469 if module_or_name is None: 470 return None 471 if isinstance(module_or_name, ModuleType): 472 module = module_or_name 473 loader = getattr(module, '__loader__', None) 474 if loader is not None: 475 return loader 476 if getattr(module, '__spec__', None) is None: 477 return None 478 fullname = module.__name__ 479 else: 480 fullname = module_or_name 481 return find_loader(fullname) 482 483 484def find_loader(fullname): 485 """Find a "loader" object for fullname 486 487 This is a backwards compatibility wrapper around 488 importlib.util.find_spec that converts most failures to ImportError 489 and only returns the loader rather than the full spec 490 """ 491 if fullname.startswith('.'): 492 msg = "Relative module name {!r} not supported".format(fullname) 493 raise ImportError(msg) 494 try: 495 spec = importlib.util.find_spec(fullname) 496 except (ImportError, AttributeError, TypeError, ValueError) as ex: 497 # This hack fixes an impedance mismatch between pkgutil and 498 # importlib, where the latter raises other errors for cases where 499 # pkgutil previously raised ImportError 500 msg = "Error while finding loader for {!r} ({}: {})" 501 raise ImportError(msg.format(fullname, type(ex), ex)) from ex 502 return spec.loader if spec is not None else None 503 504 505def extend_path(path, name): 506 """Extend a package's path. 507 508 Intended use is to place the following code in a package's __init__.py: 509 510 from pkgutil import extend_path 511 __path__ = extend_path(__path__, __name__) 512 513 This will add to the package's __path__ all subdirectories of 514 directories on sys.path named after the package. This is useful 515 if one wants to distribute different parts of a single logical 516 package as multiple directories. 517 518 It also looks for *.pkg files beginning where * matches the name 519 argument. This feature is similar to *.pth files (see site.py), 520 except that it doesn't special-case lines starting with 'import'. 521 A *.pkg file is trusted at face value: apart from checking for 522 duplicates, all entries found in a *.pkg file are added to the 523 path, regardless of whether they are exist the filesystem. (This 524 is a feature.) 525 526 If the input path is not a list (as is the case for frozen 527 packages) it is returned unchanged. The input path is not 528 modified; an extended copy is returned. Items are only appended 529 to the copy at the end. 530 531 It is assumed that sys.path is a sequence. Items of sys.path that 532 are not (unicode or 8-bit) strings referring to existing 533 directories are ignored. Unicode items of sys.path that cause 534 errors when used as filenames may cause this function to raise an 535 exception (in line with os.path.isdir() behavior). 536 """ 537 538 if not isinstance(path, list): 539 # This could happen e.g. when this is called from inside a 540 # frozen package. Return the path unchanged in that case. 541 return path 542 543 sname_pkg = name + ".pkg" 544 545 path = path[:] # Start with a copy of the existing path 546 547 parent_package, _, final_name = name.rpartition('.') 548 if parent_package: 549 try: 550 search_path = sys.modules[parent_package].__path__ 551 except (KeyError, AttributeError): 552 # We can't do anything: find_loader() returns None when 553 # passed a dotted name. 554 return path 555 else: 556 search_path = sys.path 557 558 for dir in search_path: 559 if not isinstance(dir, str): 560 continue 561 562 finder = get_importer(dir) 563 if finder is not None: 564 portions = [] 565 if hasattr(finder, 'find_spec'): 566 spec = finder.find_spec(final_name) 567 if spec is not None: 568 portions = spec.submodule_search_locations or [] 569 # Is this finder PEP 420 compliant? 570 elif hasattr(finder, 'find_loader'): 571 _, portions = finder.find_loader(final_name) 572 573 for portion in portions: 574 # XXX This may still add duplicate entries to path on 575 # case-insensitive filesystems 576 if portion not in path: 577 path.append(portion) 578 579 # XXX Is this the right thing for subpackages like zope.app? 580 # It looks for a file named "zope.app.pkg" 581 pkgfile = os.path.join(dir, sname_pkg) 582 if os.path.isfile(pkgfile): 583 try: 584 f = open(pkgfile) 585 except OSError as msg: 586 sys.stderr.write("Can't open %s: %s\n" % 587 (pkgfile, msg)) 588 else: 589 with f: 590 for line in f: 591 line = line.rstrip('\n') 592 if not line or line.startswith('#'): 593 continue 594 path.append(line) # Don't check for existence! 595 596 return path 597 598 599def get_data(package, resource): 600 """Get a resource from a package. 601 602 This is a wrapper round the PEP 302 loader get_data API. The package 603 argument should be the name of a package, in standard module format 604 (foo.bar). The resource argument should be in the form of a relative 605 filename, using '/' as the path separator. The parent directory name '..' 606 is not allowed, and nor is a rooted name (starting with a '/'). 607 608 The function returns a binary string, which is the contents of the 609 specified resource. 610 611 For packages located in the filesystem, which have already been imported, 612 this is the rough equivalent of 613 614 d = os.path.dirname(sys.modules[package].__file__) 615 data = open(os.path.join(d, resource), 'rb').read() 616 617 If the package cannot be located or loaded, or it uses a PEP 302 loader 618 which does not support get_data(), then None is returned. 619 """ 620 621 spec = importlib.util.find_spec(package) 622 if spec is None: 623 return None 624 loader = spec.loader 625 if loader is None or not hasattr(loader, 'get_data'): 626 return None 627 # XXX needs test 628 mod = (sys.modules.get(package) or 629 importlib._bootstrap._load(spec)) 630 if mod is None or not hasattr(mod, '__file__'): 631 return None 632 633 # Modify the resource name to be compatible with the loader.get_data 634 # signature - an os.path format "filename" starting with the dirname of 635 # the package's __file__ 636 parts = resource.split('/') 637 parts.insert(0, os.path.dirname(mod.__file__)) 638 resource_name = os.path.join(*parts) 639 return loader.get_data(resource_name) 640 641 642_NAME_PATTERN = None 643 644def resolve_name(name): 645 """ 646 Resolve a name to an object. 647 648 It is expected that `name` will be a string in one of the following 649 formats, where W is shorthand for a valid Python identifier and dot stands 650 for a literal period in these pseudo-regexes: 651 652 W(.W)* 653 W(.W)*:(W(.W)*)? 654 655 The first form is intended for backward compatibility only. It assumes that 656 some part of the dotted name is a package, and the rest is an object 657 somewhere within that package, possibly nested inside other objects. 658 Because the place where the package stops and the object hierarchy starts 659 can't be inferred by inspection, repeated attempts to import must be done 660 with this form. 661 662 In the second form, the caller makes the division point clear through the 663 provision of a single colon: the dotted name to the left of the colon is a 664 package to be imported, and the dotted name to the right is the object 665 hierarchy within that package. Only one import is needed in this form. If 666 it ends with the colon, then a module object is returned. 667 668 The function will return an object (which might be a module), or raise one 669 of the following exceptions: 670 671 ValueError - if `name` isn't in a recognised format 672 ImportError - if an import failed when it shouldn't have 673 AttributeError - if a failure occurred when traversing the object hierarchy 674 within the imported package to get to the desired object. 675 """ 676 global _NAME_PATTERN 677 if _NAME_PATTERN is None: 678 # Lazy import to speedup Python startup time 679 import re 680 dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*' 681 _NAME_PATTERN = re.compile(f'^(?P<pkg>{dotted_words})' 682 f'(?P<cln>:(?P<obj>{dotted_words})?)?$', 683 re.UNICODE) 684 685 m = _NAME_PATTERN.match(name) 686 if not m: 687 raise ValueError(f'invalid format: {name!r}') 688 gd = m.groupdict() 689 if gd.get('cln'): 690 # there is a colon - a one-step import is all that's needed 691 mod = importlib.import_module(gd['pkg']) 692 parts = gd.get('obj') 693 parts = parts.split('.') if parts else [] 694 else: 695 # no colon - have to iterate to find the package boundary 696 parts = name.split('.') 697 modname = parts.pop(0) 698 # first part *must* be a module/package. 699 mod = importlib.import_module(modname) 700 while parts: 701 p = parts[0] 702 s = f'{modname}.{p}' 703 try: 704 mod = importlib.import_module(s) 705 parts.pop(0) 706 modname = s 707 except ImportError: 708 break 709 # if we reach this point, mod is the module, already imported, and 710 # parts is the list of parts in the object hierarchy to be traversed, or 711 # an empty list if just the module is wanted. 712 result = mod 713 for p in parts: 714 result = getattr(result, p) 715 return result 716