1"""Find modules used by a script, using introspection.""" 2 3import dis 4import importlib._bootstrap_external 5import importlib.machinery 6import marshal 7import os 8import io 9import sys 10 11# Old imp constants: 12 13_SEARCH_ERROR = 0 14_PY_SOURCE = 1 15_PY_COMPILED = 2 16_C_EXTENSION = 3 17_PKG_DIRECTORY = 5 18_C_BUILTIN = 6 19_PY_FROZEN = 7 20 21# Modulefinder does a good job at simulating Python's, but it can not 22# handle __path__ modifications packages make at runtime. Therefore there 23# is a mechanism whereby you can register extra paths in this map for a 24# package, and it will be honored. 25 26# Note this is a mapping is lists of paths. 27packagePathMap = {} 28 29# A Public interface 30def AddPackagePath(packagename, path): 31 packagePathMap.setdefault(packagename, []).append(path) 32 33replacePackageMap = {} 34 35# This ReplacePackage mechanism allows modulefinder to work around 36# situations in which a package injects itself under the name 37# of another package into sys.modules at runtime by calling 38# ReplacePackage("real_package_name", "faked_package_name") 39# before running ModuleFinder. 40 41def ReplacePackage(oldname, newname): 42 replacePackageMap[oldname] = newname 43 44 45def _find_module(name, path=None): 46 """An importlib reimplementation of imp.find_module (for our purposes).""" 47 48 # It's necessary to clear the caches for our Finder first, in case any 49 # modules are being added/deleted/modified at runtime. In particular, 50 # test_modulefinder.py changes file tree contents in a cache-breaking way: 51 52 importlib.machinery.PathFinder.invalidate_caches() 53 54 spec = importlib.machinery.PathFinder.find_spec(name, path) 55 56 if spec is None: 57 raise ImportError("No module named {name!r}".format(name=name), name=name) 58 59 # Some special cases: 60 61 if spec.loader is importlib.machinery.BuiltinImporter: 62 return None, None, ("", "", _C_BUILTIN) 63 64 if spec.loader is importlib.machinery.FrozenImporter: 65 return None, None, ("", "", _PY_FROZEN) 66 67 file_path = spec.origin 68 69 if spec.loader.is_package(name): 70 return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY) 71 72 if isinstance(spec.loader, importlib.machinery.SourceFileLoader): 73 kind = _PY_SOURCE 74 75 elif isinstance( 76 spec.loader, ( 77 importlib.machinery.ExtensionFileLoader, 78 importlib.machinery.AppleFrameworkLoader, 79 ) 80 ): 81 kind = _C_EXTENSION 82 83 elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): 84 kind = _PY_COMPILED 85 86 else: # Should never happen. 87 return None, None, ("", "", _SEARCH_ERROR) 88 89 file = io.open_code(file_path) 90 suffix = os.path.splitext(file_path)[-1] 91 92 return file, file_path, (suffix, "rb", kind) 93 94 95class Module: 96 97 def __init__(self, name, file=None, path=None): 98 self.__name__ = name 99 self.__file__ = file 100 self.__path__ = path 101 self.__code__ = None 102 # The set of global names that are assigned to in the module. 103 # This includes those names imported through starimports of 104 # Python modules. 105 self.globalnames = {} 106 # The set of starimports this module did that could not be 107 # resolved, ie. a starimport from a non-Python module. 108 self.starimports = {} 109 110 def __repr__(self): 111 s = "Module(%r" % (self.__name__,) 112 if self.__file__ is not None: 113 s = s + ", %r" % (self.__file__,) 114 if self.__path__ is not None: 115 s = s + ", %r" % (self.__path__,) 116 s = s + ")" 117 return s 118 119class ModuleFinder: 120 121 def __init__(self, path=None, debug=0, excludes=None, replace_paths=None): 122 if path is None: 123 path = sys.path 124 self.path = path 125 self.modules = {} 126 self.badmodules = {} 127 self.debug = debug 128 self.indent = 0 129 self.excludes = excludes if excludes is not None else [] 130 self.replace_paths = replace_paths if replace_paths is not None else [] 131 self.processed_paths = [] # Used in debugging only 132 133 def msg(self, level, str, *args): 134 if level <= self.debug: 135 for i in range(self.indent): 136 print(" ", end=' ') 137 print(str, end=' ') 138 for arg in args: 139 print(repr(arg), end=' ') 140 print() 141 142 def msgin(self, *args): 143 level = args[0] 144 if level <= self.debug: 145 self.indent = self.indent + 1 146 self.msg(*args) 147 148 def msgout(self, *args): 149 level = args[0] 150 if level <= self.debug: 151 self.indent = self.indent - 1 152 self.msg(*args) 153 154 def run_script(self, pathname): 155 self.msg(2, "run_script", pathname) 156 with io.open_code(pathname) as fp: 157 stuff = ("", "rb", _PY_SOURCE) 158 self.load_module('__main__', fp, pathname, stuff) 159 160 def load_file(self, pathname): 161 dir, name = os.path.split(pathname) 162 name, ext = os.path.splitext(name) 163 with io.open_code(pathname) as fp: 164 stuff = (ext, "rb", _PY_SOURCE) 165 self.load_module(name, fp, pathname, stuff) 166 167 def import_hook(self, name, caller=None, fromlist=None, level=-1): 168 self.msg(3, "import_hook", name, caller, fromlist, level) 169 parent = self.determine_parent(caller, level=level) 170 q, tail = self.find_head_package(parent, name) 171 m = self.load_tail(q, tail) 172 if not fromlist: 173 return q 174 if m.__path__: 175 self.ensure_fromlist(m, fromlist) 176 return None 177 178 def determine_parent(self, caller, level=-1): 179 self.msgin(4, "determine_parent", caller, level) 180 if not caller or level == 0: 181 self.msgout(4, "determine_parent -> None") 182 return None 183 pname = caller.__name__ 184 if level >= 1: # relative import 185 if caller.__path__: 186 level -= 1 187 if level == 0: 188 parent = self.modules[pname] 189 assert parent is caller 190 self.msgout(4, "determine_parent ->", parent) 191 return parent 192 if pname.count(".") < level: 193 raise ImportError("relative importpath too deep") 194 pname = ".".join(pname.split(".")[:-level]) 195 parent = self.modules[pname] 196 self.msgout(4, "determine_parent ->", parent) 197 return parent 198 if caller.__path__: 199 parent = self.modules[pname] 200 assert caller is parent 201 self.msgout(4, "determine_parent ->", parent) 202 return parent 203 if '.' in pname: 204 i = pname.rfind('.') 205 pname = pname[:i] 206 parent = self.modules[pname] 207 assert parent.__name__ == pname 208 self.msgout(4, "determine_parent ->", parent) 209 return parent 210 self.msgout(4, "determine_parent -> None") 211 return None 212 213 def find_head_package(self, parent, name): 214 self.msgin(4, "find_head_package", parent, name) 215 if '.' in name: 216 i = name.find('.') 217 head = name[:i] 218 tail = name[i+1:] 219 else: 220 head = name 221 tail = "" 222 if parent: 223 qname = "%s.%s" % (parent.__name__, head) 224 else: 225 qname = head 226 q = self.import_module(head, qname, parent) 227 if q: 228 self.msgout(4, "find_head_package ->", (q, tail)) 229 return q, tail 230 if parent: 231 qname = head 232 parent = None 233 q = self.import_module(head, qname, parent) 234 if q: 235 self.msgout(4, "find_head_package ->", (q, tail)) 236 return q, tail 237 self.msgout(4, "raise ImportError: No module named", qname) 238 raise ImportError("No module named " + qname) 239 240 def load_tail(self, q, tail): 241 self.msgin(4, "load_tail", q, tail) 242 m = q 243 while tail: 244 i = tail.find('.') 245 if i < 0: i = len(tail) 246 head, tail = tail[:i], tail[i+1:] 247 mname = "%s.%s" % (m.__name__, head) 248 m = self.import_module(head, mname, m) 249 if not m: 250 self.msgout(4, "raise ImportError: No module named", mname) 251 raise ImportError("No module named " + mname) 252 self.msgout(4, "load_tail ->", m) 253 return m 254 255 def ensure_fromlist(self, m, fromlist, recursive=0): 256 self.msg(4, "ensure_fromlist", m, fromlist, recursive) 257 for sub in fromlist: 258 if sub == "*": 259 if not recursive: 260 all = self.find_all_submodules(m) 261 if all: 262 self.ensure_fromlist(m, all, 1) 263 elif not hasattr(m, sub): 264 subname = "%s.%s" % (m.__name__, sub) 265 submod = self.import_module(sub, subname, m) 266 if not submod: 267 raise ImportError("No module named " + subname) 268 269 def find_all_submodules(self, m): 270 if not m.__path__: 271 return 272 modules = {} 273 # 'suffixes' used to be a list hardcoded to [".py", ".pyc"]. 274 # But we must also collect Python extension modules - although 275 # we cannot separate normal dlls from Python extensions. 276 suffixes = [] 277 suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] 278 suffixes += importlib.machinery.SOURCE_SUFFIXES[:] 279 suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] 280 for dir in m.__path__: 281 try: 282 names = os.listdir(dir) 283 except OSError: 284 self.msg(2, "can't list directory", dir) 285 continue 286 for name in names: 287 mod = None 288 for suff in suffixes: 289 n = len(suff) 290 if name[-n:] == suff: 291 mod = name[:-n] 292 break 293 if mod and mod != "__init__": 294 modules[mod] = mod 295 return modules.keys() 296 297 def import_module(self, partname, fqname, parent): 298 self.msgin(3, "import_module", partname, fqname, parent) 299 try: 300 m = self.modules[fqname] 301 except KeyError: 302 pass 303 else: 304 self.msgout(3, "import_module ->", m) 305 return m 306 if fqname in self.badmodules: 307 self.msgout(3, "import_module -> None") 308 return None 309 if parent and parent.__path__ is None: 310 self.msgout(3, "import_module -> None") 311 return None 312 try: 313 fp, pathname, stuff = self.find_module(partname, 314 parent and parent.__path__, parent) 315 except ImportError: 316 self.msgout(3, "import_module ->", None) 317 return None 318 319 try: 320 m = self.load_module(fqname, fp, pathname, stuff) 321 finally: 322 if fp: 323 fp.close() 324 if parent: 325 setattr(parent, partname, m) 326 self.msgout(3, "import_module ->", m) 327 return m 328 329 def load_module(self, fqname, fp, pathname, file_info): 330 suffix, mode, type = file_info 331 self.msgin(2, "load_module", fqname, fp and "fp", pathname) 332 if type == _PKG_DIRECTORY: 333 m = self.load_package(fqname, pathname) 334 self.msgout(2, "load_module ->", m) 335 return m 336 if type == _PY_SOURCE: 337 co = compile(fp.read(), pathname, 'exec') 338 elif type == _PY_COMPILED: 339 try: 340 data = fp.read() 341 importlib._bootstrap_external._classify_pyc(data, fqname, {}) 342 except ImportError as exc: 343 self.msgout(2, "raise ImportError: " + str(exc), pathname) 344 raise 345 co = marshal.loads(memoryview(data)[16:]) 346 else: 347 co = None 348 m = self.add_module(fqname) 349 m.__file__ = pathname 350 if co: 351 if self.replace_paths: 352 co = self.replace_paths_in_code(co) 353 m.__code__ = co 354 self.scan_code(co, m) 355 self.msgout(2, "load_module ->", m) 356 return m 357 358 def _add_badmodule(self, name, caller): 359 if name not in self.badmodules: 360 self.badmodules[name] = {} 361 if caller: 362 self.badmodules[name][caller.__name__] = 1 363 else: 364 self.badmodules[name]["-"] = 1 365 366 def _safe_import_hook(self, name, caller, fromlist, level=-1): 367 # wrapper for self.import_hook() that won't raise ImportError 368 if name in self.badmodules: 369 self._add_badmodule(name, caller) 370 return 371 try: 372 self.import_hook(name, caller, level=level) 373 except ImportError as msg: 374 self.msg(2, "ImportError:", str(msg)) 375 self._add_badmodule(name, caller) 376 except SyntaxError as msg: 377 self.msg(2, "SyntaxError:", str(msg)) 378 self._add_badmodule(name, caller) 379 else: 380 if fromlist: 381 for sub in fromlist: 382 fullname = name + "." + sub 383 if fullname in self.badmodules: 384 self._add_badmodule(fullname, caller) 385 continue 386 try: 387 self.import_hook(name, caller, [sub], level=level) 388 except ImportError as msg: 389 self.msg(2, "ImportError:", str(msg)) 390 self._add_badmodule(fullname, caller) 391 392 def scan_opcodes(self, co): 393 # Scan the code, and yield 'interesting' opcode combinations 394 for name in dis._find_store_names(co): 395 yield "store", (name,) 396 for name, level, fromlist in dis._find_imports(co): 397 if level == 0: # absolute import 398 yield "absolute_import", (fromlist, name) 399 else: # relative import 400 yield "relative_import", (level, fromlist, name) 401 402 def scan_code(self, co, m): 403 code = co.co_code 404 scanner = self.scan_opcodes 405 for what, args in scanner(co): 406 if what == "store": 407 name, = args 408 m.globalnames[name] = 1 409 elif what == "absolute_import": 410 fromlist, name = args 411 have_star = 0 412 if fromlist is not None: 413 if "*" in fromlist: 414 have_star = 1 415 fromlist = [f for f in fromlist if f != "*"] 416 self._safe_import_hook(name, m, fromlist, level=0) 417 if have_star: 418 # We've encountered an "import *". If it is a Python module, 419 # the code has already been parsed and we can suck out the 420 # global names. 421 mm = None 422 if m.__path__: 423 # At this point we don't know whether 'name' is a 424 # submodule of 'm' or a global module. Let's just try 425 # the full name first. 426 mm = self.modules.get(m.__name__ + "." + name) 427 if mm is None: 428 mm = self.modules.get(name) 429 if mm is not None: 430 m.globalnames.update(mm.globalnames) 431 m.starimports.update(mm.starimports) 432 if mm.__code__ is None: 433 m.starimports[name] = 1 434 else: 435 m.starimports[name] = 1 436 elif what == "relative_import": 437 level, fromlist, name = args 438 if name: 439 self._safe_import_hook(name, m, fromlist, level=level) 440 else: 441 parent = self.determine_parent(m, level=level) 442 self._safe_import_hook(parent.__name__, None, fromlist, level=0) 443 else: 444 # We don't expect anything else from the generator. 445 raise RuntimeError(what) 446 447 for c in co.co_consts: 448 if isinstance(c, type(co)): 449 self.scan_code(c, m) 450 451 def load_package(self, fqname, pathname): 452 self.msgin(2, "load_package", fqname, pathname) 453 newname = replacePackageMap.get(fqname) 454 if newname: 455 fqname = newname 456 m = self.add_module(fqname) 457 m.__file__ = pathname 458 m.__path__ = [pathname] 459 460 # As per comment at top of file, simulate runtime __path__ additions. 461 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) 462 463 fp, buf, stuff = self.find_module("__init__", m.__path__) 464 try: 465 self.load_module(fqname, fp, buf, stuff) 466 self.msgout(2, "load_package ->", m) 467 return m 468 finally: 469 if fp: 470 fp.close() 471 472 def add_module(self, fqname): 473 if fqname in self.modules: 474 return self.modules[fqname] 475 self.modules[fqname] = m = Module(fqname) 476 return m 477 478 def find_module(self, name, path, parent=None): 479 if parent is not None: 480 # assert path is not None 481 fullname = parent.__name__+'.'+name 482 else: 483 fullname = name 484 if fullname in self.excludes: 485 self.msgout(3, "find_module -> Excluded", fullname) 486 raise ImportError(name) 487 488 if path is None: 489 if name in sys.builtin_module_names: 490 return (None, None, ("", "", _C_BUILTIN)) 491 492 path = self.path 493 494 return _find_module(name, path) 495 496 def report(self): 497 """Print a report to stdout, listing the found modules with their 498 paths, as well as modules that are missing, or seem to be missing. 499 """ 500 print() 501 print(" %-25s %s" % ("Name", "File")) 502 print(" %-25s %s" % ("----", "----")) 503 # Print modules found 504 keys = sorted(self.modules.keys()) 505 for key in keys: 506 m = self.modules[key] 507 if m.__path__: 508 print("P", end=' ') 509 else: 510 print("m", end=' ') 511 print("%-25s" % key, m.__file__ or "") 512 513 # Print missing modules 514 missing, maybe = self.any_missing_maybe() 515 if missing: 516 print() 517 print("Missing modules:") 518 for name in missing: 519 mods = sorted(self.badmodules[name].keys()) 520 print("?", name, "imported from", ', '.join(mods)) 521 # Print modules that may be missing, but then again, maybe not... 522 if maybe: 523 print() 524 print("Submodules that appear to be missing, but could also be", end=' ') 525 print("global names in the parent package:") 526 for name in maybe: 527 mods = sorted(self.badmodules[name].keys()) 528 print("?", name, "imported from", ', '.join(mods)) 529 530 def any_missing(self): 531 """Return a list of modules that appear to be missing. Use 532 any_missing_maybe() if you want to know which modules are 533 certain to be missing, and which *may* be missing. 534 """ 535 missing, maybe = self.any_missing_maybe() 536 return missing + maybe 537 538 def any_missing_maybe(self): 539 """Return two lists, one with modules that are certainly missing 540 and one with modules that *may* be missing. The latter names could 541 either be submodules *or* just global names in the package. 542 543 The reason it can't always be determined is that it's impossible to 544 tell which names are imported when "from module import *" is done 545 with an extension module, short of actually importing it. 546 """ 547 missing = [] 548 maybe = [] 549 for name in self.badmodules: 550 if name in self.excludes: 551 continue 552 i = name.rfind(".") 553 if i < 0: 554 missing.append(name) 555 continue 556 subname = name[i+1:] 557 pkgname = name[:i] 558 pkg = self.modules.get(pkgname) 559 if pkg is not None: 560 if pkgname in self.badmodules[name]: 561 # The package tried to import this module itself and 562 # failed. It's definitely missing. 563 missing.append(name) 564 elif subname in pkg.globalnames: 565 # It's a global in the package: definitely not missing. 566 pass 567 elif pkg.starimports: 568 # It could be missing, but the package did an "import *" 569 # from a non-Python module, so we simply can't be sure. 570 maybe.append(name) 571 else: 572 # It's not a global in the package, the package didn't 573 # do funny star imports, it's very likely to be missing. 574 # The symbol could be inserted into the package from the 575 # outside, but since that's not good style we simply list 576 # it missing. 577 missing.append(name) 578 else: 579 missing.append(name) 580 missing.sort() 581 maybe.sort() 582 return missing, maybe 583 584 def replace_paths_in_code(self, co): 585 new_filename = original_filename = os.path.normpath(co.co_filename) 586 for f, r in self.replace_paths: 587 if original_filename.startswith(f): 588 new_filename = r + original_filename[len(f):] 589 break 590 591 if self.debug and original_filename not in self.processed_paths: 592 if new_filename != original_filename: 593 self.msgout(2, "co_filename %r changed to %r" \ 594 % (original_filename,new_filename,)) 595 else: 596 self.msgout(2, "co_filename %r remains unchanged" \ 597 % (original_filename,)) 598 self.processed_paths.append(original_filename) 599 600 consts = list(co.co_consts) 601 for i in range(len(consts)): 602 if isinstance(consts[i], type(co)): 603 consts[i] = self.replace_paths_in_code(consts[i]) 604 605 return co.replace(co_consts=tuple(consts), co_filename=new_filename) 606 607 608def test(): 609 # Parse command line 610 import getopt 611 try: 612 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") 613 except getopt.error as msg: 614 print(msg) 615 return 616 617 # Process options 618 debug = 1 619 domods = 0 620 addpath = [] 621 exclude = [] 622 for o, a in opts: 623 if o == '-d': 624 debug = debug + 1 625 if o == '-m': 626 domods = 1 627 if o == '-p': 628 addpath = addpath + a.split(os.pathsep) 629 if o == '-q': 630 debug = 0 631 if o == '-x': 632 exclude.append(a) 633 634 # Provide default arguments 635 if not args: 636 script = "hello.py" 637 else: 638 script = args[0] 639 640 # Set the path based on sys.path and the script directory 641 path = sys.path[:] 642 path[0] = os.path.dirname(script) 643 path = addpath + path 644 if debug > 1: 645 print("path:") 646 for item in path: 647 print(" ", repr(item)) 648 649 # Create the module finder and turn its crank 650 mf = ModuleFinder(path, debug, exclude) 651 for arg in args[1:]: 652 if arg == '-m': 653 domods = 1 654 continue 655 if domods: 656 if arg[-2:] == '.*': 657 mf.import_hook(arg[:-2], None, ["*"]) 658 else: 659 mf.import_hook(arg) 660 else: 661 mf.load_file(arg) 662 mf.run_script(script) 663 mf.report() 664 return mf # for -i debugging 665 666 667if __name__ == '__main__': 668 try: 669 mf = test() 670 except KeyboardInterrupt: 671 print("\n[interrupted]") 672