1"""Utilities to support packages.""" 2 3from collections import namedtuple 4from functools import singledispatch as simplegeneric 5import importlib 6import importlib.util 7import importlib.machinery 8import os 9import os.path 10import sys 11from types import ModuleType 12import warnings 13 14__all__ = [ 15 'get_importer', 'iter_importers', 'get_loader', 'find_loader', 16 'walk_packages', 'iter_modules', 'get_data', 17 'read_code', 'extend_path', 18 'ModuleInfo', 19] 20 21 22ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg') 23ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.' 24 25 26def read_code(stream): 27 # This helper is needed in order for the PEP 302 emulation to 28 # correctly handle compiled files 29 import marshal 30 31 magic = stream.read(4) 32 if magic != importlib.util.MAGIC_NUMBER: 33 return None 34 35 stream.read(12) # Skip rest of the header 36 return marshal.load(stream) 37 38 39def walk_packages(path=None, prefix='', onerror=None): 40 """Yields ModuleInfo for all modules recursively 41 on path, or, if path is None, all accessible modules. 42 43 'path' should be either None or a list of paths to look for 44 modules in. 45 46 'prefix' is a string to output on the front of every module name 47 on output. 48 49 Note that this function must import all *packages* (NOT all 50 modules!) on the given path, in order to access the __path__ 51 attribute to find submodules. 52 53 'onerror' is a function which gets called with one argument (the 54 name of the package which was being imported) if any exception 55 occurs while trying to import a package. If no onerror function is 56 supplied, ImportErrors are caught and ignored, while all other 57 exceptions are propagated, terminating the search. 58 59 Examples: 60 61 # list all modules python can access 62 walk_packages() 63 64 # list all submodules of ctypes 65 walk_packages(ctypes.__path__, ctypes.__name__+'.') 66 """ 67 68 def seen(p, m={}): 69 if p in m: 70 return True 71 m[p] = True 72 73 for info in iter_modules(path, prefix): 74 yield info 75 76 if info.ispkg: 77 try: 78 __import__(info.name) 79 except ImportError: 80 if onerror is not None: 81 onerror(info.name) 82 except Exception: 83 if onerror is not None: 84 onerror(info.name) 85 else: 86 raise 87 else: 88 path = getattr(sys.modules[info.name], '__path__', None) or [] 89 90 # don't traverse path items we've seen before 91 path = [p for p in path if not seen(p)] 92 93 yield from walk_packages(path, info.name+'.', onerror) 94 95 96def iter_modules(path=None, prefix=''): 97 """Yields ModuleInfo for all submodules on path, 98 or, if path is None, all top-level modules on sys.path. 99 100 'path' should be either None or a list of paths to look for 101 modules in. 102 103 'prefix' is a string to output on the front of every module name 104 on output. 105 """ 106 if path is None: 107 importers = iter_importers() 108 elif isinstance(path, str): 109 raise ValueError("path must be None or list of paths to look for " 110 "modules in") 111 else: 112 importers = map(get_importer, path) 113 114 yielded = {} 115 for i in importers: 116 for name, ispkg in iter_importer_modules(i, prefix): 117 if name not in yielded: 118 yielded[name] = 1 119 yield ModuleInfo(i, name, ispkg) 120 121 122@simplegeneric 123def iter_importer_modules(importer, prefix=''): 124 if not hasattr(importer, 'iter_modules'): 125 return [] 126 return importer.iter_modules(prefix) 127 128 129# Implement a file walker for the normal importlib path hook 130def _iter_file_finder_modules(importer, prefix=''): 131 if importer.path is None or not os.path.isdir(importer.path): 132 return 133 134 yielded = {} 135 import inspect 136 try: 137 filenames = os.listdir(importer.path) 138 except OSError: 139 # ignore unreadable directories like import does 140 filenames = [] 141 filenames.sort() # handle packages before same-named modules 142 143 for fn in filenames: 144 modname = inspect.getmodulename(fn) 145 if modname=='__init__' or modname in yielded: 146 continue 147 148 path = os.path.join(importer.path, fn) 149 ispkg = False 150 151 if not modname and os.path.isdir(path) and '.' not in fn: 152 modname = fn 153 try: 154 dircontents = os.listdir(path) 155 except OSError: 156 # ignore unreadable directories like import does 157 dircontents = [] 158 for fn in dircontents: 159 subname = inspect.getmodulename(fn) 160 if subname=='__init__': 161 ispkg = True 162 break 163 else: 164 continue # not a package 165 166 if modname and '.' not in modname: 167 yielded[modname] = 1 168 yield prefix + modname, ispkg 169 170iter_importer_modules.register( 171 importlib.machinery.FileFinder, _iter_file_finder_modules) 172 173 174try: 175 import zipimport 176 from zipimport import zipimporter 177 178 def iter_zipimport_modules(importer, prefix=''): 179 dirlist = sorted(zipimport._zip_directory_cache[importer.archive]) 180 _prefix = importer.prefix 181 plen = len(_prefix) 182 yielded = {} 183 import inspect 184 for fn in dirlist: 185 if not fn.startswith(_prefix): 186 continue 187 188 fn = fn[plen:].split(os.sep) 189 190 if len(fn)==2 and fn[1].startswith('__init__.py'): 191 if fn[0] not in yielded: 192 yielded[fn[0]] = 1 193 yield prefix + fn[0], True 194 195 if len(fn)!=1: 196 continue 197 198 modname = inspect.getmodulename(fn[0]) 199 if modname=='__init__': 200 continue 201 202 if modname and '.' not in modname and modname not in yielded: 203 yielded[modname] = 1 204 yield prefix + modname, False 205 206 iter_importer_modules.register(zipimporter, iter_zipimport_modules) 207 208except ImportError: 209 pass 210 211 212def get_importer(path_item): 213 """Retrieve a finder for the given path item 214 215 The returned finder is cached in sys.path_importer_cache 216 if it was newly created by a path hook. 217 218 The cache (or part of it) can be cleared manually if a 219 rescan of sys.path_hooks is necessary. 220 """ 221 path_item = os.fsdecode(path_item) 222 try: 223 importer = sys.path_importer_cache[path_item] 224 except KeyError: 225 for path_hook in sys.path_hooks: 226 try: 227 importer = path_hook(path_item) 228 sys.path_importer_cache.setdefault(path_item, importer) 229 break 230 except ImportError: 231 pass 232 else: 233 importer = None 234 return importer 235 236 237def iter_importers(fullname=""): 238 """Yield finders for the given module name 239 240 If fullname contains a '.', the finders will be for the package 241 containing fullname, otherwise they will be all registered top level 242 finders (i.e. those on both sys.meta_path and sys.path_hooks). 243 244 If the named module is in a package, that package is imported as a side 245 effect of invoking this function. 246 247 If no module name is specified, all top level finders are produced. 248 """ 249 if fullname.startswith('.'): 250 msg = "Relative module name {!r} not supported".format(fullname) 251 raise ImportError(msg) 252 if '.' in fullname: 253 # Get the containing package's __path__ 254 pkg_name = fullname.rpartition(".")[0] 255 pkg = importlib.import_module(pkg_name) 256 path = getattr(pkg, '__path__', None) 257 if path is None: 258 return 259 else: 260 yield from sys.meta_path 261 path = sys.path 262 for item in path: 263 yield get_importer(item) 264 265 266def get_loader(module_or_name): 267 """Get a "loader" object for module_or_name 268 269 Returns None if the module cannot be found or imported. 270 If the named module is not already imported, its containing package 271 (if any) is imported, in order to establish the package __path__. 272 """ 273 warnings._deprecated("pkgutil.get_loader", 274 f"{warnings._DEPRECATED_MSG}; " 275 "use importlib.util.find_spec() instead", 276 remove=(3, 14)) 277 if module_or_name in sys.modules: 278 module_or_name = sys.modules[module_or_name] 279 if module_or_name is None: 280 return None 281 if isinstance(module_or_name, ModuleType): 282 module = module_or_name 283 loader = getattr(module, '__loader__', None) 284 if loader is not None: 285 return loader 286 if getattr(module, '__spec__', None) is None: 287 return None 288 fullname = module.__name__ 289 else: 290 fullname = module_or_name 291 return find_loader(fullname) 292 293 294def find_loader(fullname): 295 """Find a "loader" object for fullname 296 297 This is a backwards compatibility wrapper around 298 importlib.util.find_spec that converts most failures to ImportError 299 and only returns the loader rather than the full spec 300 """ 301 warnings._deprecated("pkgutil.find_loader", 302 f"{warnings._DEPRECATED_MSG}; " 303 "use importlib.util.find_spec() instead", 304 remove=(3, 14)) 305 if fullname.startswith('.'): 306 msg = "Relative module name {!r} not supported".format(fullname) 307 raise ImportError(msg) 308 try: 309 spec = importlib.util.find_spec(fullname) 310 except (ImportError, AttributeError, TypeError, ValueError) as ex: 311 # This hack fixes an impedance mismatch between pkgutil and 312 # importlib, where the latter raises other errors for cases where 313 # pkgutil previously raised ImportError 314 msg = "Error while finding loader for {!r} ({}: {})" 315 raise ImportError(msg.format(fullname, type(ex), ex)) from ex 316 return spec.loader if spec is not None else None 317 318 319def extend_path(path, name): 320 """Extend a package's path. 321 322 Intended use is to place the following code in a package's __init__.py: 323 324 from pkgutil import extend_path 325 __path__ = extend_path(__path__, __name__) 326 327 For each directory on sys.path that has a subdirectory that 328 matches the package name, add the subdirectory to the package's 329 __path__. This is useful if one wants to distribute different 330 parts of a single logical package as multiple directories. 331 332 It also looks for *.pkg files beginning where * matches the name 333 argument. This feature is similar to *.pth files (see site.py), 334 except that it doesn't special-case lines starting with 'import'. 335 A *.pkg file is trusted at face value: apart from checking for 336 duplicates, all entries found in a *.pkg file are added to the 337 path, regardless of whether they are exist the filesystem. (This 338 is a feature.) 339 340 If the input path is not a list (as is the case for frozen 341 packages) it is returned unchanged. The input path is not 342 modified; an extended copy is returned. Items are only appended 343 to the copy at the end. 344 345 It is assumed that sys.path is a sequence. Items of sys.path that 346 are not (unicode or 8-bit) strings referring to existing 347 directories are ignored. Unicode items of sys.path that cause 348 errors when used as filenames may cause this function to raise an 349 exception (in line with os.path.isdir() behavior). 350 """ 351 352 if not isinstance(path, list): 353 # This could happen e.g. when this is called from inside a 354 # frozen package. Return the path unchanged in that case. 355 return path 356 357 sname_pkg = name + ".pkg" 358 359 path = path[:] # Start with a copy of the existing path 360 361 parent_package, _, final_name = name.rpartition('.') 362 if parent_package: 363 try: 364 search_path = sys.modules[parent_package].__path__ 365 except (KeyError, AttributeError): 366 # We can't do anything: find_loader() returns None when 367 # passed a dotted name. 368 return path 369 else: 370 search_path = sys.path 371 372 for dir in search_path: 373 if not isinstance(dir, str): 374 continue 375 376 finder = get_importer(dir) 377 if finder is not None: 378 portions = [] 379 if hasattr(finder, 'find_spec'): 380 spec = finder.find_spec(final_name) 381 if spec is not None: 382 portions = spec.submodule_search_locations or [] 383 # Is this finder PEP 420 compliant? 384 elif hasattr(finder, 'find_loader'): 385 _, portions = finder.find_loader(final_name) 386 387 for portion in portions: 388 # XXX This may still add duplicate entries to path on 389 # case-insensitive filesystems 390 if portion not in path: 391 path.append(portion) 392 393 # XXX Is this the right thing for subpackages like zope.app? 394 # It looks for a file named "zope.app.pkg" 395 pkgfile = os.path.join(dir, sname_pkg) 396 if os.path.isfile(pkgfile): 397 try: 398 f = open(pkgfile) 399 except OSError as msg: 400 sys.stderr.write("Can't open %s: %s\n" % 401 (pkgfile, msg)) 402 else: 403 with f: 404 for line in f: 405 line = line.rstrip('\n') 406 if not line or line.startswith('#'): 407 continue 408 path.append(line) # Don't check for existence! 409 410 return path 411 412 413def get_data(package, resource): 414 """Get a resource from a package. 415 416 This is a wrapper round the PEP 302 loader get_data API. The package 417 argument should be the name of a package, in standard module format 418 (foo.bar). The resource argument should be in the form of a relative 419 filename, using '/' as the path separator. The parent directory name '..' 420 is not allowed, and nor is a rooted name (starting with a '/'). 421 422 The function returns a binary string, which is the contents of the 423 specified resource. 424 425 For packages located in the filesystem, which have already been imported, 426 this is the rough equivalent of 427 428 d = os.path.dirname(sys.modules[package].__file__) 429 data = open(os.path.join(d, resource), 'rb').read() 430 431 If the package cannot be located or loaded, or it uses a PEP 302 loader 432 which does not support get_data(), then None is returned. 433 """ 434 435 spec = importlib.util.find_spec(package) 436 if spec is None: 437 return None 438 loader = spec.loader 439 if loader is None or not hasattr(loader, 'get_data'): 440 return None 441 # XXX needs test 442 mod = (sys.modules.get(package) or 443 importlib._bootstrap._load(spec)) 444 if mod is None or not hasattr(mod, '__file__'): 445 return None 446 447 # Modify the resource name to be compatible with the loader.get_data 448 # signature - an os.path format "filename" starting with the dirname of 449 # the package's __file__ 450 parts = resource.split('/') 451 parts.insert(0, os.path.dirname(mod.__file__)) 452 resource_name = os.path.join(*parts) 453 return loader.get_data(resource_name) 454 455 456_NAME_PATTERN = None 457 458def resolve_name(name): 459 """ 460 Resolve a name to an object. 461 462 It is expected that `name` will be a string in one of the following 463 formats, where W is shorthand for a valid Python identifier and dot stands 464 for a literal period in these pseudo-regexes: 465 466 W(.W)* 467 W(.W)*:(W(.W)*)? 468 469 The first form is intended for backward compatibility only. It assumes that 470 some part of the dotted name is a package, and the rest is an object 471 somewhere within that package, possibly nested inside other objects. 472 Because the place where the package stops and the object hierarchy starts 473 can't be inferred by inspection, repeated attempts to import must be done 474 with this form. 475 476 In the second form, the caller makes the division point clear through the 477 provision of a single colon: the dotted name to the left of the colon is a 478 package to be imported, and the dotted name to the right is the object 479 hierarchy within that package. Only one import is needed in this form. If 480 it ends with the colon, then a module object is returned. 481 482 The function will return an object (which might be a module), or raise one 483 of the following exceptions: 484 485 ValueError - if `name` isn't in a recognised format 486 ImportError - if an import failed when it shouldn't have 487 AttributeError - if a failure occurred when traversing the object hierarchy 488 within the imported package to get to the desired object. 489 """ 490 global _NAME_PATTERN 491 if _NAME_PATTERN is None: 492 # Lazy import to speedup Python startup time 493 import re 494 dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*' 495 _NAME_PATTERN = re.compile(f'^(?P<pkg>{dotted_words})' 496 f'(?P<cln>:(?P<obj>{dotted_words})?)?$', 497 re.UNICODE) 498 499 m = _NAME_PATTERN.match(name) 500 if not m: 501 raise ValueError(f'invalid format: {name!r}') 502 gd = m.groupdict() 503 if gd.get('cln'): 504 # there is a colon - a one-step import is all that's needed 505 mod = importlib.import_module(gd['pkg']) 506 parts = gd.get('obj') 507 parts = parts.split('.') if parts else [] 508 else: 509 # no colon - have to iterate to find the package boundary 510 parts = name.split('.') 511 modname = parts.pop(0) 512 # first part *must* be a module/package. 513 mod = importlib.import_module(modname) 514 while parts: 515 p = parts[0] 516 s = f'{modname}.{p}' 517 try: 518 mod = importlib.import_module(s) 519 parts.pop(0) 520 modname = s 521 except ImportError: 522 break 523 # if we reach this point, mod is the module, already imported, and 524 # parts is the list of parts in the object hierarchy to be traversed, or 525 # an empty list if just the module is wanted. 526 result = mod 527 for p in parts: 528 result = getattr(result, p) 529 return result 530