1"""Find modules used by a script, using introspection.""" 2 3import dis 4import importlib._bootstrap_external 5import importlib.machinery 6import marshal 7import os 8import io 9import sys 10 11 12LOAD_CONST = dis.opmap['LOAD_CONST'] 13IMPORT_NAME = dis.opmap['IMPORT_NAME'] 14STORE_NAME = dis.opmap['STORE_NAME'] 15STORE_GLOBAL = dis.opmap['STORE_GLOBAL'] 16STORE_OPS = STORE_NAME, STORE_GLOBAL 17EXTENDED_ARG = dis.EXTENDED_ARG 18 19# Old imp constants: 20 21_SEARCH_ERROR = 0 22_PY_SOURCE = 1 23_PY_COMPILED = 2 24_C_EXTENSION = 3 25_PKG_DIRECTORY = 5 26_C_BUILTIN = 6 27_PY_FROZEN = 7 28 29# Modulefinder does a good job at simulating Python's, but it can not 30# handle __path__ modifications packages make at runtime. Therefore there 31# is a mechanism whereby you can register extra paths in this map for a 32# package, and it will be honored. 33 34# Note this is a mapping is lists of paths. 35packagePathMap = {} 36 37# A Public interface 38def AddPackagePath(packagename, path): 39 packagePathMap.setdefault(packagename, []).append(path) 40 41replacePackageMap = {} 42 43# This ReplacePackage mechanism allows modulefinder to work around 44# situations in which a package injects itself under the name 45# of another package into sys.modules at runtime by calling 46# ReplacePackage("real_package_name", "faked_package_name") 47# before running ModuleFinder. 48 49def ReplacePackage(oldname, newname): 50 replacePackageMap[oldname] = newname 51 52 53def _find_module(name, path=None): 54 """An importlib reimplementation of imp.find_module (for our purposes).""" 55 56 # It's necessary to clear the caches for our Finder first, in case any 57 # modules are being added/deleted/modified at runtime. In particular, 58 # test_modulefinder.py changes file tree contents in a cache-breaking way: 59 60 importlib.machinery.PathFinder.invalidate_caches() 61 62 spec = importlib.machinery.PathFinder.find_spec(name, path) 63 64 if spec is None: 65 raise ImportError("No module named {name!r}".format(name=name), name=name) 66 67 # Some special cases: 68 69 if spec.loader is importlib.machinery.BuiltinImporter: 70 return None, None, ("", "", _C_BUILTIN) 71 72 if spec.loader is importlib.machinery.FrozenImporter: 73 return None, None, ("", "", _PY_FROZEN) 74 75 file_path = spec.origin 76 77 if spec.loader.is_package(name): 78 return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY) 79 80 if isinstance(spec.loader, importlib.machinery.SourceFileLoader): 81 kind = _PY_SOURCE 82 83 elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader): 84 kind = _C_EXTENSION 85 86 elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): 87 kind = _PY_COMPILED 88 89 else: # Should never happen. 90 return None, None, ("", "", _SEARCH_ERROR) 91 92 file = io.open_code(file_path) 93 suffix = os.path.splitext(file_path)[-1] 94 95 return file, file_path, (suffix, "rb", kind) 96 97 98class Module: 99 100 def __init__(self, name, file=None, path=None): 101 self.__name__ = name 102 self.__file__ = file 103 self.__path__ = path 104 self.__code__ = None 105 # The set of global names that are assigned to in the module. 106 # This includes those names imported through starimports of 107 # Python modules. 108 self.globalnames = {} 109 # The set of starimports this module did that could not be 110 # resolved, ie. a starimport from a non-Python module. 111 self.starimports = {} 112 113 def __repr__(self): 114 s = "Module(%r" % (self.__name__,) 115 if self.__file__ is not None: 116 s = s + ", %r" % (self.__file__,) 117 if self.__path__ is not None: 118 s = s + ", %r" % (self.__path__,) 119 s = s + ")" 120 return s 121 122class ModuleFinder: 123 124 def __init__(self, path=None, debug=0, excludes=None, replace_paths=None): 125 if path is None: 126 path = sys.path 127 self.path = path 128 self.modules = {} 129 self.badmodules = {} 130 self.debug = debug 131 self.indent = 0 132 self.excludes = excludes if excludes is not None else [] 133 self.replace_paths = replace_paths if replace_paths is not None else [] 134 self.processed_paths = [] # Used in debugging only 135 136 def msg(self, level, str, *args): 137 if level <= self.debug: 138 for i in range(self.indent): 139 print(" ", end=' ') 140 print(str, end=' ') 141 for arg in args: 142 print(repr(arg), end=' ') 143 print() 144 145 def msgin(self, *args): 146 level = args[0] 147 if level <= self.debug: 148 self.indent = self.indent + 1 149 self.msg(*args) 150 151 def msgout(self, *args): 152 level = args[0] 153 if level <= self.debug: 154 self.indent = self.indent - 1 155 self.msg(*args) 156 157 def run_script(self, pathname): 158 self.msg(2, "run_script", pathname) 159 with io.open_code(pathname) as fp: 160 stuff = ("", "rb", _PY_SOURCE) 161 self.load_module('__main__', fp, pathname, stuff) 162 163 def load_file(self, pathname): 164 dir, name = os.path.split(pathname) 165 name, ext = os.path.splitext(name) 166 with io.open_code(pathname) as fp: 167 stuff = (ext, "rb", _PY_SOURCE) 168 self.load_module(name, fp, pathname, stuff) 169 170 def import_hook(self, name, caller=None, fromlist=None, level=-1): 171 self.msg(3, "import_hook", name, caller, fromlist, level) 172 parent = self.determine_parent(caller, level=level) 173 q, tail = self.find_head_package(parent, name) 174 m = self.load_tail(q, tail) 175 if not fromlist: 176 return q 177 if m.__path__: 178 self.ensure_fromlist(m, fromlist) 179 return None 180 181 def determine_parent(self, caller, level=-1): 182 self.msgin(4, "determine_parent", caller, level) 183 if not caller or level == 0: 184 self.msgout(4, "determine_parent -> None") 185 return None 186 pname = caller.__name__ 187 if level >= 1: # relative import 188 if caller.__path__: 189 level -= 1 190 if level == 0: 191 parent = self.modules[pname] 192 assert parent is caller 193 self.msgout(4, "determine_parent ->", parent) 194 return parent 195 if pname.count(".") < level: 196 raise ImportError("relative importpath too deep") 197 pname = ".".join(pname.split(".")[:-level]) 198 parent = self.modules[pname] 199 self.msgout(4, "determine_parent ->", parent) 200 return parent 201 if caller.__path__: 202 parent = self.modules[pname] 203 assert caller is parent 204 self.msgout(4, "determine_parent ->", parent) 205 return parent 206 if '.' in pname: 207 i = pname.rfind('.') 208 pname = pname[:i] 209 parent = self.modules[pname] 210 assert parent.__name__ == pname 211 self.msgout(4, "determine_parent ->", parent) 212 return parent 213 self.msgout(4, "determine_parent -> None") 214 return None 215 216 def find_head_package(self, parent, name): 217 self.msgin(4, "find_head_package", parent, name) 218 if '.' in name: 219 i = name.find('.') 220 head = name[:i] 221 tail = name[i+1:] 222 else: 223 head = name 224 tail = "" 225 if parent: 226 qname = "%s.%s" % (parent.__name__, head) 227 else: 228 qname = head 229 q = self.import_module(head, qname, parent) 230 if q: 231 self.msgout(4, "find_head_package ->", (q, tail)) 232 return q, tail 233 if parent: 234 qname = head 235 parent = None 236 q = self.import_module(head, qname, parent) 237 if q: 238 self.msgout(4, "find_head_package ->", (q, tail)) 239 return q, tail 240 self.msgout(4, "raise ImportError: No module named", qname) 241 raise ImportError("No module named " + qname) 242 243 def load_tail(self, q, tail): 244 self.msgin(4, "load_tail", q, tail) 245 m = q 246 while tail: 247 i = tail.find('.') 248 if i < 0: i = len(tail) 249 head, tail = tail[:i], tail[i+1:] 250 mname = "%s.%s" % (m.__name__, head) 251 m = self.import_module(head, mname, m) 252 if not m: 253 self.msgout(4, "raise ImportError: No module named", mname) 254 raise ImportError("No module named " + mname) 255 self.msgout(4, "load_tail ->", m) 256 return m 257 258 def ensure_fromlist(self, m, fromlist, recursive=0): 259 self.msg(4, "ensure_fromlist", m, fromlist, recursive) 260 for sub in fromlist: 261 if sub == "*": 262 if not recursive: 263 all = self.find_all_submodules(m) 264 if all: 265 self.ensure_fromlist(m, all, 1) 266 elif not hasattr(m, sub): 267 subname = "%s.%s" % (m.__name__, sub) 268 submod = self.import_module(sub, subname, m) 269 if not submod: 270 raise ImportError("No module named " + subname) 271 272 def find_all_submodules(self, m): 273 if not m.__path__: 274 return 275 modules = {} 276 # 'suffixes' used to be a list hardcoded to [".py", ".pyc"]. 277 # But we must also collect Python extension modules - although 278 # we cannot separate normal dlls from Python extensions. 279 suffixes = [] 280 suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] 281 suffixes += importlib.machinery.SOURCE_SUFFIXES[:] 282 suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] 283 for dir in m.__path__: 284 try: 285 names = os.listdir(dir) 286 except OSError: 287 self.msg(2, "can't list directory", dir) 288 continue 289 for name in names: 290 mod = None 291 for suff in suffixes: 292 n = len(suff) 293 if name[-n:] == suff: 294 mod = name[:-n] 295 break 296 if mod and mod != "__init__": 297 modules[mod] = mod 298 return modules.keys() 299 300 def import_module(self, partname, fqname, parent): 301 self.msgin(3, "import_module", partname, fqname, parent) 302 try: 303 m = self.modules[fqname] 304 except KeyError: 305 pass 306 else: 307 self.msgout(3, "import_module ->", m) 308 return m 309 if fqname in self.badmodules: 310 self.msgout(3, "import_module -> None") 311 return None 312 if parent and parent.__path__ is None: 313 self.msgout(3, "import_module -> None") 314 return None 315 try: 316 fp, pathname, stuff = self.find_module(partname, 317 parent and parent.__path__, parent) 318 except ImportError: 319 self.msgout(3, "import_module ->", None) 320 return None 321 322 try: 323 m = self.load_module(fqname, fp, pathname, stuff) 324 finally: 325 if fp: 326 fp.close() 327 if parent: 328 setattr(parent, partname, m) 329 self.msgout(3, "import_module ->", m) 330 return m 331 332 def load_module(self, fqname, fp, pathname, file_info): 333 suffix, mode, type = file_info 334 self.msgin(2, "load_module", fqname, fp and "fp", pathname) 335 if type == _PKG_DIRECTORY: 336 m = self.load_package(fqname, pathname) 337 self.msgout(2, "load_module ->", m) 338 return m 339 if type == _PY_SOURCE: 340 co = compile(fp.read(), pathname, 'exec') 341 elif type == _PY_COMPILED: 342 try: 343 data = fp.read() 344 importlib._bootstrap_external._classify_pyc(data, fqname, {}) 345 except ImportError as exc: 346 self.msgout(2, "raise ImportError: " + str(exc), pathname) 347 raise 348 co = marshal.loads(memoryview(data)[16:]) 349 else: 350 co = None 351 m = self.add_module(fqname) 352 m.__file__ = pathname 353 if co: 354 if self.replace_paths: 355 co = self.replace_paths_in_code(co) 356 m.__code__ = co 357 self.scan_code(co, m) 358 self.msgout(2, "load_module ->", m) 359 return m 360 361 def _add_badmodule(self, name, caller): 362 if name not in self.badmodules: 363 self.badmodules[name] = {} 364 if caller: 365 self.badmodules[name][caller.__name__] = 1 366 else: 367 self.badmodules[name]["-"] = 1 368 369 def _safe_import_hook(self, name, caller, fromlist, level=-1): 370 # wrapper for self.import_hook() that won't raise ImportError 371 if name in self.badmodules: 372 self._add_badmodule(name, caller) 373 return 374 try: 375 self.import_hook(name, caller, level=level) 376 except ImportError as msg: 377 self.msg(2, "ImportError:", str(msg)) 378 self._add_badmodule(name, caller) 379 except SyntaxError as msg: 380 self.msg(2, "SyntaxError:", str(msg)) 381 self._add_badmodule(name, caller) 382 else: 383 if fromlist: 384 for sub in fromlist: 385 fullname = name + "." + sub 386 if fullname in self.badmodules: 387 self._add_badmodule(fullname, caller) 388 continue 389 try: 390 self.import_hook(name, caller, [sub], level=level) 391 except ImportError as msg: 392 self.msg(2, "ImportError:", str(msg)) 393 self._add_badmodule(fullname, caller) 394 395 def scan_opcodes(self, co): 396 # Scan the code, and yield 'interesting' opcode combinations 397 code = co.co_code 398 names = co.co_names 399 consts = co.co_consts 400 opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code) 401 if op != EXTENDED_ARG] 402 for i, (op, oparg) in enumerate(opargs): 403 if op in STORE_OPS: 404 yield "store", (names[oparg],) 405 continue 406 if (op == IMPORT_NAME and i >= 2 407 and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST): 408 level = consts[opargs[i-2][1]] 409 fromlist = consts[opargs[i-1][1]] 410 if level == 0: # absolute import 411 yield "absolute_import", (fromlist, names[oparg]) 412 else: # relative import 413 yield "relative_import", (level, fromlist, names[oparg]) 414 continue 415 416 def scan_code(self, co, m): 417 code = co.co_code 418 scanner = self.scan_opcodes 419 for what, args in scanner(co): 420 if what == "store": 421 name, = args 422 m.globalnames[name] = 1 423 elif what == "absolute_import": 424 fromlist, name = args 425 have_star = 0 426 if fromlist is not None: 427 if "*" in fromlist: 428 have_star = 1 429 fromlist = [f for f in fromlist if f != "*"] 430 self._safe_import_hook(name, m, fromlist, level=0) 431 if have_star: 432 # We've encountered an "import *". If it is a Python module, 433 # the code has already been parsed and we can suck out the 434 # global names. 435 mm = None 436 if m.__path__: 437 # At this point we don't know whether 'name' is a 438 # submodule of 'm' or a global module. Let's just try 439 # the full name first. 440 mm = self.modules.get(m.__name__ + "." + name) 441 if mm is None: 442 mm = self.modules.get(name) 443 if mm is not None: 444 m.globalnames.update(mm.globalnames) 445 m.starimports.update(mm.starimports) 446 if mm.__code__ is None: 447 m.starimports[name] = 1 448 else: 449 m.starimports[name] = 1 450 elif what == "relative_import": 451 level, fromlist, name = args 452 if name: 453 self._safe_import_hook(name, m, fromlist, level=level) 454 else: 455 parent = self.determine_parent(m, level=level) 456 self._safe_import_hook(parent.__name__, None, fromlist, level=0) 457 else: 458 # We don't expect anything else from the generator. 459 raise RuntimeError(what) 460 461 for c in co.co_consts: 462 if isinstance(c, type(co)): 463 self.scan_code(c, m) 464 465 def load_package(self, fqname, pathname): 466 self.msgin(2, "load_package", fqname, pathname) 467 newname = replacePackageMap.get(fqname) 468 if newname: 469 fqname = newname 470 m = self.add_module(fqname) 471 m.__file__ = pathname 472 m.__path__ = [pathname] 473 474 # As per comment at top of file, simulate runtime __path__ additions. 475 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) 476 477 fp, buf, stuff = self.find_module("__init__", m.__path__) 478 try: 479 self.load_module(fqname, fp, buf, stuff) 480 self.msgout(2, "load_package ->", m) 481 return m 482 finally: 483 if fp: 484 fp.close() 485 486 def add_module(self, fqname): 487 if fqname in self.modules: 488 return self.modules[fqname] 489 self.modules[fqname] = m = Module(fqname) 490 return m 491 492 def find_module(self, name, path, parent=None): 493 if parent is not None: 494 # assert path is not None 495 fullname = parent.__name__+'.'+name 496 else: 497 fullname = name 498 if fullname in self.excludes: 499 self.msgout(3, "find_module -> Excluded", fullname) 500 raise ImportError(name) 501 502 if path is None: 503 if name in sys.builtin_module_names: 504 return (None, None, ("", "", _C_BUILTIN)) 505 506 path = self.path 507 508 return _find_module(name, path) 509 510 def report(self): 511 """Print a report to stdout, listing the found modules with their 512 paths, as well as modules that are missing, or seem to be missing. 513 """ 514 print() 515 print(" %-25s %s" % ("Name", "File")) 516 print(" %-25s %s" % ("----", "----")) 517 # Print modules found 518 keys = sorted(self.modules.keys()) 519 for key in keys: 520 m = self.modules[key] 521 if m.__path__: 522 print("P", end=' ') 523 else: 524 print("m", end=' ') 525 print("%-25s" % key, m.__file__ or "") 526 527 # Print missing modules 528 missing, maybe = self.any_missing_maybe() 529 if missing: 530 print() 531 print("Missing modules:") 532 for name in missing: 533 mods = sorted(self.badmodules[name].keys()) 534 print("?", name, "imported from", ', '.join(mods)) 535 # Print modules that may be missing, but then again, maybe not... 536 if maybe: 537 print() 538 print("Submodules that appear to be missing, but could also be", end=' ') 539 print("global names in the parent package:") 540 for name in maybe: 541 mods = sorted(self.badmodules[name].keys()) 542 print("?", name, "imported from", ', '.join(mods)) 543 544 def any_missing(self): 545 """Return a list of modules that appear to be missing. Use 546 any_missing_maybe() if you want to know which modules are 547 certain to be missing, and which *may* be missing. 548 """ 549 missing, maybe = self.any_missing_maybe() 550 return missing + maybe 551 552 def any_missing_maybe(self): 553 """Return two lists, one with modules that are certainly missing 554 and one with modules that *may* be missing. The latter names could 555 either be submodules *or* just global names in the package. 556 557 The reason it can't always be determined is that it's impossible to 558 tell which names are imported when "from module import *" is done 559 with an extension module, short of actually importing it. 560 """ 561 missing = [] 562 maybe = [] 563 for name in self.badmodules: 564 if name in self.excludes: 565 continue 566 i = name.rfind(".") 567 if i < 0: 568 missing.append(name) 569 continue 570 subname = name[i+1:] 571 pkgname = name[:i] 572 pkg = self.modules.get(pkgname) 573 if pkg is not None: 574 if pkgname in self.badmodules[name]: 575 # The package tried to import this module itself and 576 # failed. It's definitely missing. 577 missing.append(name) 578 elif subname in pkg.globalnames: 579 # It's a global in the package: definitely not missing. 580 pass 581 elif pkg.starimports: 582 # It could be missing, but the package did an "import *" 583 # from a non-Python module, so we simply can't be sure. 584 maybe.append(name) 585 else: 586 # It's not a global in the package, the package didn't 587 # do funny star imports, it's very likely to be missing. 588 # The symbol could be inserted into the package from the 589 # outside, but since that's not good style we simply list 590 # it missing. 591 missing.append(name) 592 else: 593 missing.append(name) 594 missing.sort() 595 maybe.sort() 596 return missing, maybe 597 598 def replace_paths_in_code(self, co): 599 new_filename = original_filename = os.path.normpath(co.co_filename) 600 for f, r in self.replace_paths: 601 if original_filename.startswith(f): 602 new_filename = r + original_filename[len(f):] 603 break 604 605 if self.debug and original_filename not in self.processed_paths: 606 if new_filename != original_filename: 607 self.msgout(2, "co_filename %r changed to %r" \ 608 % (original_filename,new_filename,)) 609 else: 610 self.msgout(2, "co_filename %r remains unchanged" \ 611 % (original_filename,)) 612 self.processed_paths.append(original_filename) 613 614 consts = list(co.co_consts) 615 for i in range(len(consts)): 616 if isinstance(consts[i], type(co)): 617 consts[i] = self.replace_paths_in_code(consts[i]) 618 619 return co.replace(co_consts=tuple(consts), co_filename=new_filename) 620 621 622def test(): 623 # Parse command line 624 import getopt 625 try: 626 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") 627 except getopt.error as msg: 628 print(msg) 629 return 630 631 # Process options 632 debug = 1 633 domods = 0 634 addpath = [] 635 exclude = [] 636 for o, a in opts: 637 if o == '-d': 638 debug = debug + 1 639 if o == '-m': 640 domods = 1 641 if o == '-p': 642 addpath = addpath + a.split(os.pathsep) 643 if o == '-q': 644 debug = 0 645 if o == '-x': 646 exclude.append(a) 647 648 # Provide default arguments 649 if not args: 650 script = "hello.py" 651 else: 652 script = args[0] 653 654 # Set the path based on sys.path and the script directory 655 path = sys.path[:] 656 path[0] = os.path.dirname(script) 657 path = addpath + path 658 if debug > 1: 659 print("path:") 660 for item in path: 661 print(" ", repr(item)) 662 663 # Create the module finder and turn its crank 664 mf = ModuleFinder(path, debug, exclude) 665 for arg in args[1:]: 666 if arg == '-m': 667 domods = 1 668 continue 669 if domods: 670 if arg[-2:] == '.*': 671 mf.import_hook(arg[:-2], None, ["*"]) 672 else: 673 mf.import_hook(arg) 674 else: 675 mf.load_file(arg) 676 mf.run_script(script) 677 mf.report() 678 return mf # for -i debugging 679 680 681if __name__ == '__main__': 682 try: 683 mf = test() 684 except KeyboardInterrupt: 685 print("\n[interrupted]") 686