1"""Utilities to support packages.""" 2 3from collections import namedtuple 4from functools import singledispatch as simplegeneric 5import importlib 6import importlib.util 7import importlib.machinery 8import os 9import os.path 10import re 11import sys 12from types import ModuleType 13import warnings 14 15__all__ = [ 16 'get_importer', 'iter_importers', 'get_loader', 'find_loader', 17 'walk_packages', 'iter_modules', 'get_data', 18 'ImpImporter', 'ImpLoader', 'read_code', 'extend_path', 19 'ModuleInfo', 20] 21 22 23ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg') 24ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.' 25 26 27def _get_spec(finder, name): 28 """Return the finder-specific module spec.""" 29 # Works with legacy finders. 30 try: 31 find_spec = finder.find_spec 32 except AttributeError: 33 loader = finder.find_module(name) 34 if loader is None: 35 return None 36 return importlib.util.spec_from_loader(name, loader) 37 else: 38 return find_spec(name) 39 40 41def read_code(stream): 42 # This helper is needed in order for the PEP 302 emulation to 43 # correctly handle compiled files 44 import marshal 45 46 magic = stream.read(4) 47 if magic != importlib.util.MAGIC_NUMBER: 48 return None 49 50 stream.read(12) # Skip rest of the header 51 return marshal.load(stream) 52 53 54def walk_packages(path=None, prefix='', onerror=None): 55 """Yields ModuleInfo for all modules recursively 56 on path, or, if path is None, all accessible modules. 57 58 'path' should be either None or a list of paths to look for 59 modules in. 60 61 'prefix' is a string to output on the front of every module name 62 on output. 63 64 Note that this function must import all *packages* (NOT all 65 modules!) on the given path, in order to access the __path__ 66 attribute to find submodules. 67 68 'onerror' is a function which gets called with one argument (the 69 name of the package which was being imported) if any exception 70 occurs while trying to import a package. If no onerror function is 71 supplied, ImportErrors are caught and ignored, while all other 72 exceptions are propagated, terminating the search. 73 74 Examples: 75 76 # list all modules python can access 77 walk_packages() 78 79 # list all submodules of ctypes 80 walk_packages(ctypes.__path__, ctypes.__name__+'.') 81 """ 82 83 def seen(p, m={}): 84 if p in m: 85 return True 86 m[p] = True 87 88 for info in iter_modules(path, prefix): 89 yield info 90 91 if info.ispkg: 92 try: 93 __import__(info.name) 94 except ImportError: 95 if onerror is not None: 96 onerror(info.name) 97 except Exception: 98 if onerror is not None: 99 onerror(info.name) 100 else: 101 raise 102 else: 103 path = getattr(sys.modules[info.name], '__path__', None) or [] 104 105 # don't traverse path items we've seen before 106 path = [p for p in path if not seen(p)] 107 108 yield from walk_packages(path, info.name+'.', onerror) 109 110 111def iter_modules(path=None, prefix=''): 112 """Yields ModuleInfo for all submodules on path, 113 or, if path is None, all top-level modules on sys.path. 114 115 'path' should be either None or a list of paths to look for 116 modules in. 117 118 'prefix' is a string to output on the front of every module name 119 on output. 120 """ 121 if path is None: 122 importers = iter_importers() 123 elif isinstance(path, str): 124 raise ValueError("path must be None or list of paths to look for " 125 "modules in") 126 else: 127 importers = map(get_importer, path) 128 129 yielded = {} 130 for i in importers: 131 for name, ispkg in iter_importer_modules(i, prefix): 132 if name not in yielded: 133 yielded[name] = 1 134 yield ModuleInfo(i, name, ispkg) 135 136 137@simplegeneric 138def iter_importer_modules(importer, prefix=''): 139 if not hasattr(importer, 'iter_modules'): 140 return [] 141 return importer.iter_modules(prefix) 142 143 144# Implement a file walker for the normal importlib path hook 145def _iter_file_finder_modules(importer, prefix=''): 146 if importer.path is None or not os.path.isdir(importer.path): 147 return 148 149 yielded = {} 150 import inspect 151 try: 152 filenames = os.listdir(importer.path) 153 except OSError: 154 # ignore unreadable directories like import does 155 filenames = [] 156 filenames.sort() # handle packages before same-named modules 157 158 for fn in filenames: 159 modname = inspect.getmodulename(fn) 160 if modname=='__init__' or modname in yielded: 161 continue 162 163 path = os.path.join(importer.path, fn) 164 ispkg = False 165 166 if not modname and os.path.isdir(path) and '.' not in fn: 167 modname = fn 168 try: 169 dircontents = os.listdir(path) 170 except OSError: 171 # ignore unreadable directories like import does 172 dircontents = [] 173 for fn in dircontents: 174 subname = inspect.getmodulename(fn) 175 if subname=='__init__': 176 ispkg = True 177 break 178 else: 179 continue # not a package 180 181 if modname and '.' not in modname: 182 yielded[modname] = 1 183 yield prefix + modname, ispkg 184 185iter_importer_modules.register( 186 importlib.machinery.FileFinder, _iter_file_finder_modules) 187 188 189def _import_imp(): 190 global imp 191 with warnings.catch_warnings(): 192 warnings.simplefilter('ignore', DeprecationWarning) 193 imp = importlib.import_module('imp') 194 195class ImpImporter: 196 """PEP 302 Finder that wraps Python's "classic" import algorithm 197 198 ImpImporter(dirname) produces a PEP 302 finder that searches that 199 directory. ImpImporter(None) produces a PEP 302 finder that searches 200 the current sys.path, plus any modules that are frozen or built-in. 201 202 Note that ImpImporter does not currently support being used by placement 203 on sys.meta_path. 204 """ 205 206 def __init__(self, path=None): 207 global imp 208 warnings.warn("This emulation is deprecated, use 'importlib' instead", 209 DeprecationWarning) 210 _import_imp() 211 self.path = path 212 213 def find_module(self, fullname, path=None): 214 # Note: we ignore 'path' argument since it is only used via meta_path 215 subname = fullname.split(".")[-1] 216 if subname != fullname and self.path is None: 217 return None 218 if self.path is None: 219 path = None 220 else: 221 path = [os.path.realpath(self.path)] 222 try: 223 file, filename, etc = imp.find_module(subname, path) 224 except ImportError: 225 return None 226 return ImpLoader(fullname, file, filename, etc) 227 228 def iter_modules(self, prefix=''): 229 if self.path is None or not os.path.isdir(self.path): 230 return 231 232 yielded = {} 233 import inspect 234 try: 235 filenames = os.listdir(self.path) 236 except OSError: 237 # ignore unreadable directories like import does 238 filenames = [] 239 filenames.sort() # handle packages before same-named modules 240 241 for fn in filenames: 242 modname = inspect.getmodulename(fn) 243 if modname=='__init__' or modname in yielded: 244 continue 245 246 path = os.path.join(self.path, fn) 247 ispkg = False 248 249 if not modname and os.path.isdir(path) and '.' not in fn: 250 modname = fn 251 try: 252 dircontents = os.listdir(path) 253 except OSError: 254 # ignore unreadable directories like import does 255 dircontents = [] 256 for fn in dircontents: 257 subname = inspect.getmodulename(fn) 258 if subname=='__init__': 259 ispkg = True 260 break 261 else: 262 continue # not a package 263 264 if modname and '.' not in modname: 265 yielded[modname] = 1 266 yield prefix + modname, ispkg 267 268 269class ImpLoader: 270 """PEP 302 Loader that wraps Python's "classic" import algorithm 271 """ 272 code = source = None 273 274 def __init__(self, fullname, file, filename, etc): 275 warnings.warn("This emulation is deprecated, use 'importlib' instead", 276 DeprecationWarning) 277 _import_imp() 278 self.file = file 279 self.filename = filename 280 self.fullname = fullname 281 self.etc = etc 282 283 def load_module(self, fullname): 284 self._reopen() 285 try: 286 mod = imp.load_module(fullname, self.file, self.filename, self.etc) 287 finally: 288 if self.file: 289 self.file.close() 290 # Note: we don't set __loader__ because we want the module to look 291 # normal; i.e. this is just a wrapper for standard import machinery 292 return mod 293 294 def get_data(self, pathname): 295 with open(pathname, "rb") as file: 296 return file.read() 297 298 def _reopen(self): 299 if self.file and self.file.closed: 300 mod_type = self.etc[2] 301 if mod_type==imp.PY_SOURCE: 302 self.file = open(self.filename, 'r') 303 elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION): 304 self.file = open(self.filename, 'rb') 305 306 def _fix_name(self, fullname): 307 if fullname is None: 308 fullname = self.fullname 309 elif fullname != self.fullname: 310 raise ImportError("Loader for module %s cannot handle " 311 "module %s" % (self.fullname, fullname)) 312 return fullname 313 314 def is_package(self, fullname): 315 fullname = self._fix_name(fullname) 316 return self.etc[2]==imp.PKG_DIRECTORY 317 318 def get_code(self, fullname=None): 319 fullname = self._fix_name(fullname) 320 if self.code is None: 321 mod_type = self.etc[2] 322 if mod_type==imp.PY_SOURCE: 323 source = self.get_source(fullname) 324 self.code = compile(source, self.filename, 'exec') 325 elif mod_type==imp.PY_COMPILED: 326 self._reopen() 327 try: 328 self.code = read_code(self.file) 329 finally: 330 self.file.close() 331 elif mod_type==imp.PKG_DIRECTORY: 332 self.code = self._get_delegate().get_code() 333 return self.code 334 335 def get_source(self, fullname=None): 336 fullname = self._fix_name(fullname) 337 if self.source is None: 338 mod_type = self.etc[2] 339 if mod_type==imp.PY_SOURCE: 340 self._reopen() 341 try: 342 self.source = self.file.read() 343 finally: 344 self.file.close() 345 elif mod_type==imp.PY_COMPILED: 346 if os.path.exists(self.filename[:-1]): 347 with open(self.filename[:-1], 'r') as f: 348 self.source = f.read() 349 elif mod_type==imp.PKG_DIRECTORY: 350 self.source = self._get_delegate().get_source() 351 return self.source 352 353 def _get_delegate(self): 354 finder = ImpImporter(self.filename) 355 spec = _get_spec(finder, '__init__') 356 return spec.loader 357 358 def get_filename(self, fullname=None): 359 fullname = self._fix_name(fullname) 360 mod_type = self.etc[2] 361 if mod_type==imp.PKG_DIRECTORY: 362 return self._get_delegate().get_filename() 363 elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION): 364 return self.filename 365 return None 366 367 368try: 369 import zipimport 370 from zipimport import zipimporter 371 372 def iter_zipimport_modules(importer, prefix=''): 373 dirlist = sorted(zipimport._zip_directory_cache[importer.archive]) 374 _prefix = importer.prefix 375 plen = len(_prefix) 376 yielded = {} 377 import inspect 378 for fn in dirlist: 379 if not fn.startswith(_prefix): 380 continue 381 382 fn = fn[plen:].split(os.sep) 383 384 if len(fn)==2 and fn[1].startswith('__init__.py'): 385 if fn[0] not in yielded: 386 yielded[fn[0]] = 1 387 yield prefix + fn[0], True 388 389 if len(fn)!=1: 390 continue 391 392 modname = inspect.getmodulename(fn[0]) 393 if modname=='__init__': 394 continue 395 396 if modname and '.' not in modname and modname not in yielded: 397 yielded[modname] = 1 398 yield prefix + modname, False 399 400 iter_importer_modules.register(zipimporter, iter_zipimport_modules) 401 402except ImportError: 403 pass 404 405 406def get_importer(path_item): 407 """Retrieve a finder for the given path item 408 409 The returned finder is cached in sys.path_importer_cache 410 if it was newly created by a path hook. 411 412 The cache (or part of it) can be cleared manually if a 413 rescan of sys.path_hooks is necessary. 414 """ 415 try: 416 importer = sys.path_importer_cache[path_item] 417 except KeyError: 418 for path_hook in sys.path_hooks: 419 try: 420 importer = path_hook(path_item) 421 sys.path_importer_cache.setdefault(path_item, importer) 422 break 423 except ImportError: 424 pass 425 else: 426 importer = None 427 return importer 428 429 430def iter_importers(fullname=""): 431 """Yield finders for the given module name 432 433 If fullname contains a '.', the finders will be for the package 434 containing fullname, otherwise they will be all registered top level 435 finders (i.e. those on both sys.meta_path and sys.path_hooks). 436 437 If the named module is in a package, that package is imported as a side 438 effect of invoking this function. 439 440 If no module name is specified, all top level finders are produced. 441 """ 442 if fullname.startswith('.'): 443 msg = "Relative module name {!r} not supported".format(fullname) 444 raise ImportError(msg) 445 if '.' in fullname: 446 # Get the containing package's __path__ 447 pkg_name = fullname.rpartition(".")[0] 448 pkg = importlib.import_module(pkg_name) 449 path = getattr(pkg, '__path__', None) 450 if path is None: 451 return 452 else: 453 yield from sys.meta_path 454 path = sys.path 455 for item in path: 456 yield get_importer(item) 457 458 459def get_loader(module_or_name): 460 """Get a "loader" object for module_or_name 461 462 Returns None if the module cannot be found or imported. 463 If the named module is not already imported, its containing package 464 (if any) is imported, in order to establish the package __path__. 465 """ 466 if module_or_name in sys.modules: 467 module_or_name = sys.modules[module_or_name] 468 if module_or_name is None: 469 return None 470 if isinstance(module_or_name, ModuleType): 471 module = module_or_name 472 loader = getattr(module, '__loader__', None) 473 if loader is not None: 474 return loader 475 if getattr(module, '__spec__', None) is None: 476 return None 477 fullname = module.__name__ 478 else: 479 fullname = module_or_name 480 return find_loader(fullname) 481 482 483def find_loader(fullname): 484 """Find a "loader" object for fullname 485 486 This is a backwards compatibility wrapper around 487 importlib.util.find_spec that converts most failures to ImportError 488 and only returns the loader rather than the full spec 489 """ 490 if fullname.startswith('.'): 491 msg = "Relative module name {!r} not supported".format(fullname) 492 raise ImportError(msg) 493 try: 494 spec = importlib.util.find_spec(fullname) 495 except (ImportError, AttributeError, TypeError, ValueError) as ex: 496 # This hack fixes an impedance mismatch between pkgutil and 497 # importlib, where the latter raises other errors for cases where 498 # pkgutil previously raised ImportError 499 msg = "Error while finding loader for {!r} ({}: {})" 500 raise ImportError(msg.format(fullname, type(ex), ex)) from ex 501 return spec.loader if spec is not None else None 502 503 504def extend_path(path, name): 505 """Extend a package's path. 506 507 Intended use is to place the following code in a package's __init__.py: 508 509 from pkgutil import extend_path 510 __path__ = extend_path(__path__, __name__) 511 512 This will add to the package's __path__ all subdirectories of 513 directories on sys.path named after the package. This is useful 514 if one wants to distribute different parts of a single logical 515 package as multiple directories. 516 517 It also looks for *.pkg files beginning where * matches the name 518 argument. This feature is similar to *.pth files (see site.py), 519 except that it doesn't special-case lines starting with 'import'. 520 A *.pkg file is trusted at face value: apart from checking for 521 duplicates, all entries found in a *.pkg file are added to the 522 path, regardless of whether they are exist the filesystem. (This 523 is a feature.) 524 525 If the input path is not a list (as is the case for frozen 526 packages) it is returned unchanged. The input path is not 527 modified; an extended copy is returned. Items are only appended 528 to the copy at the end. 529 530 It is assumed that sys.path is a sequence. Items of sys.path that 531 are not (unicode or 8-bit) strings referring to existing 532 directories are ignored. Unicode items of sys.path that cause 533 errors when used as filenames may cause this function to raise an 534 exception (in line with os.path.isdir() behavior). 535 """ 536 537 if not isinstance(path, list): 538 # This could happen e.g. when this is called from inside a 539 # frozen package. Return the path unchanged in that case. 540 return path 541 542 sname_pkg = name + ".pkg" 543 544 path = path[:] # Start with a copy of the existing path 545 546 parent_package, _, final_name = name.rpartition('.') 547 if parent_package: 548 try: 549 search_path = sys.modules[parent_package].__path__ 550 except (KeyError, AttributeError): 551 # We can't do anything: find_loader() returns None when 552 # passed a dotted name. 553 return path 554 else: 555 search_path = sys.path 556 557 for dir in search_path: 558 if not isinstance(dir, str): 559 continue 560 561 finder = get_importer(dir) 562 if finder is not None: 563 portions = [] 564 if hasattr(finder, 'find_spec'): 565 spec = finder.find_spec(final_name) 566 if spec is not None: 567 portions = spec.submodule_search_locations or [] 568 # Is this finder PEP 420 compliant? 569 elif hasattr(finder, 'find_loader'): 570 _, portions = finder.find_loader(final_name) 571 572 for portion in portions: 573 # XXX This may still add duplicate entries to path on 574 # case-insensitive filesystems 575 if portion not in path: 576 path.append(portion) 577 578 # XXX Is this the right thing for subpackages like zope.app? 579 # It looks for a file named "zope.app.pkg" 580 pkgfile = os.path.join(dir, sname_pkg) 581 if os.path.isfile(pkgfile): 582 try: 583 f = open(pkgfile) 584 except OSError as msg: 585 sys.stderr.write("Can't open %s: %s\n" % 586 (pkgfile, msg)) 587 else: 588 with f: 589 for line in f: 590 line = line.rstrip('\n') 591 if not line or line.startswith('#'): 592 continue 593 path.append(line) # Don't check for existence! 594 595 return path 596 597 598def get_data(package, resource): 599 """Get a resource from a package. 600 601 This is a wrapper round the PEP 302 loader get_data API. The package 602 argument should be the name of a package, in standard module format 603 (foo.bar). The resource argument should be in the form of a relative 604 filename, using '/' as the path separator. The parent directory name '..' 605 is not allowed, and nor is a rooted name (starting with a '/'). 606 607 The function returns a binary string, which is the contents of the 608 specified resource. 609 610 For packages located in the filesystem, which have already been imported, 611 this is the rough equivalent of 612 613 d = os.path.dirname(sys.modules[package].__file__) 614 data = open(os.path.join(d, resource), 'rb').read() 615 616 If the package cannot be located or loaded, or it uses a PEP 302 loader 617 which does not support get_data(), then None is returned. 618 """ 619 620 spec = importlib.util.find_spec(package) 621 if spec is None: 622 return None 623 loader = spec.loader 624 if loader is None or not hasattr(loader, 'get_data'): 625 return None 626 # XXX needs test 627 mod = (sys.modules.get(package) or 628 importlib._bootstrap._load(spec)) 629 if mod is None or not hasattr(mod, '__file__'): 630 return None 631 632 # Modify the resource name to be compatible with the loader.get_data 633 # signature - an os.path format "filename" starting with the dirname of 634 # the package's __file__ 635 parts = resource.split('/') 636 parts.insert(0, os.path.dirname(mod.__file__)) 637 resource_name = os.path.join(*parts) 638 return loader.get_data(resource_name) 639 640 641_DOTTED_WORDS = r'(?!\d)(\w+)(\.(?!\d)(\w+))*' 642_NAME_PATTERN = re.compile(f'^(?P<pkg>{_DOTTED_WORDS})(?P<cln>:(?P<obj>{_DOTTED_WORDS})?)?$', re.U) 643del _DOTTED_WORDS 644 645def resolve_name(name): 646 """ 647 Resolve a name to an object. 648 649 It is expected that `name` will be a string in one of the following 650 formats, where W is shorthand for a valid Python identifier and dot stands 651 for a literal period in these pseudo-regexes: 652 653 W(.W)* 654 W(.W)*:(W(.W)*)? 655 656 The first form is intended for backward compatibility only. It assumes that 657 some part of the dotted name is a package, and the rest is an object 658 somewhere within that package, possibly nested inside other objects. 659 Because the place where the package stops and the object hierarchy starts 660 can't be inferred by inspection, repeated attempts to import must be done 661 with this form. 662 663 In the second form, the caller makes the division point clear through the 664 provision of a single colon: the dotted name to the left of the colon is a 665 package to be imported, and the dotted name to the right is the object 666 hierarchy within that package. Only one import is needed in this form. If 667 it ends with the colon, then a module object is returned. 668 669 The function will return an object (which might be a module), or raise one 670 of the following exceptions: 671 672 ValueError - if `name` isn't in a recognised format 673 ImportError - if an import failed when it shouldn't have 674 AttributeError - if a failure occurred when traversing the object hierarchy 675 within the imported package to get to the desired object) 676 """ 677 m = _NAME_PATTERN.match(name) 678 if not m: 679 raise ValueError(f'invalid format: {name!r}') 680 gd = m.groupdict() 681 if gd.get('cln'): 682 # there is a colon - a one-step import is all that's needed 683 mod = importlib.import_module(gd['pkg']) 684 parts = gd.get('obj') 685 parts = parts.split('.') if parts else [] 686 else: 687 # no colon - have to iterate to find the package boundary 688 parts = name.split('.') 689 modname = parts.pop(0) 690 # first part *must* be a module/package. 691 mod = importlib.import_module(modname) 692 while parts: 693 p = parts[0] 694 s = f'{modname}.{p}' 695 try: 696 mod = importlib.import_module(s) 697 parts.pop(0) 698 modname = s 699 except ImportError: 700 break 701 # if we reach this point, mod is the module, already imported, and 702 # parts is the list of parts in the object hierarchy to be traversed, or 703 # an empty list if just the module is wanted. 704 result = mod 705 for p in parts: 706 result = getattr(result, p) 707 return result 708