1"""Find modules used by a script, using introspection.""" 2 3import dis 4import importlib._bootstrap_external 5import importlib.machinery 6import marshal 7import os 8import sys 9import types 10import warnings 11with warnings.catch_warnings(): 12 warnings.simplefilter('ignore', DeprecationWarning) 13 import imp 14 15LOAD_CONST = dis.opmap['LOAD_CONST'] 16IMPORT_NAME = dis.opmap['IMPORT_NAME'] 17STORE_NAME = dis.opmap['STORE_NAME'] 18STORE_GLOBAL = dis.opmap['STORE_GLOBAL'] 19STORE_OPS = STORE_NAME, STORE_GLOBAL 20EXTENDED_ARG = dis.EXTENDED_ARG 21 22# Modulefinder does a good job at simulating Python's, but it can not 23# handle __path__ modifications packages make at runtime. Therefore there 24# is a mechanism whereby you can register extra paths in this map for a 25# package, and it will be honored. 26 27# Note this is a mapping is lists of paths. 28packagePathMap = {} 29 30# A Public interface 31def AddPackagePath(packagename, path): 32 packagePathMap.setdefault(packagename, []).append(path) 33 34replacePackageMap = {} 35 36# This ReplacePackage mechanism allows modulefinder to work around 37# situations in which a package injects itself under the name 38# of another package into sys.modules at runtime by calling 39# ReplacePackage("real_package_name", "faked_package_name") 40# before running ModuleFinder. 41 42def ReplacePackage(oldname, newname): 43 replacePackageMap[oldname] = newname 44 45 46class Module: 47 48 def __init__(self, name, file=None, path=None): 49 self.__name__ = name 50 self.__file__ = file 51 self.__path__ = path 52 self.__code__ = None 53 # The set of global names that are assigned to in the module. 54 # This includes those names imported through starimports of 55 # Python modules. 56 self.globalnames = {} 57 # The set of starimports this module did that could not be 58 # resolved, ie. a starimport from a non-Python module. 59 self.starimports = {} 60 61 def __repr__(self): 62 s = "Module(%r" % (self.__name__,) 63 if self.__file__ is not None: 64 s = s + ", %r" % (self.__file__,) 65 if self.__path__ is not None: 66 s = s + ", %r" % (self.__path__,) 67 s = s + ")" 68 return s 69 70class ModuleFinder: 71 72 def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): 73 if path is None: 74 path = sys.path 75 self.path = path 76 self.modules = {} 77 self.badmodules = {} 78 self.debug = debug 79 self.indent = 0 80 self.excludes = excludes 81 self.replace_paths = replace_paths 82 self.processed_paths = [] # Used in debugging only 83 84 def msg(self, level, str, *args): 85 if level <= self.debug: 86 for i in range(self.indent): 87 print(" ", end=' ') 88 print(str, end=' ') 89 for arg in args: 90 print(repr(arg), end=' ') 91 print() 92 93 def msgin(self, *args): 94 level = args[0] 95 if level <= self.debug: 96 self.indent = self.indent + 1 97 self.msg(*args) 98 99 def msgout(self, *args): 100 level = args[0] 101 if level <= self.debug: 102 self.indent = self.indent - 1 103 self.msg(*args) 104 105 def run_script(self, pathname): 106 self.msg(2, "run_script", pathname) 107 with open(pathname) as fp: 108 stuff = ("", "r", imp.PY_SOURCE) 109 self.load_module('__main__', fp, pathname, stuff) 110 111 def load_file(self, pathname): 112 dir, name = os.path.split(pathname) 113 name, ext = os.path.splitext(name) 114 with open(pathname) as fp: 115 stuff = (ext, "r", imp.PY_SOURCE) 116 self.load_module(name, fp, pathname, stuff) 117 118 def import_hook(self, name, caller=None, fromlist=None, level=-1): 119 self.msg(3, "import_hook", name, caller, fromlist, level) 120 parent = self.determine_parent(caller, level=level) 121 q, tail = self.find_head_package(parent, name) 122 m = self.load_tail(q, tail) 123 if not fromlist: 124 return q 125 if m.__path__: 126 self.ensure_fromlist(m, fromlist) 127 return None 128 129 def determine_parent(self, caller, level=-1): 130 self.msgin(4, "determine_parent", caller, level) 131 if not caller or level == 0: 132 self.msgout(4, "determine_parent -> None") 133 return None 134 pname = caller.__name__ 135 if level >= 1: # relative import 136 if caller.__path__: 137 level -= 1 138 if level == 0: 139 parent = self.modules[pname] 140 assert parent is caller 141 self.msgout(4, "determine_parent ->", parent) 142 return parent 143 if pname.count(".") < level: 144 raise ImportError("relative importpath too deep") 145 pname = ".".join(pname.split(".")[:-level]) 146 parent = self.modules[pname] 147 self.msgout(4, "determine_parent ->", parent) 148 return parent 149 if caller.__path__: 150 parent = self.modules[pname] 151 assert caller is parent 152 self.msgout(4, "determine_parent ->", parent) 153 return parent 154 if '.' in pname: 155 i = pname.rfind('.') 156 pname = pname[:i] 157 parent = self.modules[pname] 158 assert parent.__name__ == pname 159 self.msgout(4, "determine_parent ->", parent) 160 return parent 161 self.msgout(4, "determine_parent -> None") 162 return None 163 164 def find_head_package(self, parent, name): 165 self.msgin(4, "find_head_package", parent, name) 166 if '.' in name: 167 i = name.find('.') 168 head = name[:i] 169 tail = name[i+1:] 170 else: 171 head = name 172 tail = "" 173 if parent: 174 qname = "%s.%s" % (parent.__name__, head) 175 else: 176 qname = head 177 q = self.import_module(head, qname, parent) 178 if q: 179 self.msgout(4, "find_head_package ->", (q, tail)) 180 return q, tail 181 if parent: 182 qname = head 183 parent = None 184 q = self.import_module(head, qname, parent) 185 if q: 186 self.msgout(4, "find_head_package ->", (q, tail)) 187 return q, tail 188 self.msgout(4, "raise ImportError: No module named", qname) 189 raise ImportError("No module named " + qname) 190 191 def load_tail(self, q, tail): 192 self.msgin(4, "load_tail", q, tail) 193 m = q 194 while tail: 195 i = tail.find('.') 196 if i < 0: i = len(tail) 197 head, tail = tail[:i], tail[i+1:] 198 mname = "%s.%s" % (m.__name__, head) 199 m = self.import_module(head, mname, m) 200 if not m: 201 self.msgout(4, "raise ImportError: No module named", mname) 202 raise ImportError("No module named " + mname) 203 self.msgout(4, "load_tail ->", m) 204 return m 205 206 def ensure_fromlist(self, m, fromlist, recursive=0): 207 self.msg(4, "ensure_fromlist", m, fromlist, recursive) 208 for sub in fromlist: 209 if sub == "*": 210 if not recursive: 211 all = self.find_all_submodules(m) 212 if all: 213 self.ensure_fromlist(m, all, 1) 214 elif not hasattr(m, sub): 215 subname = "%s.%s" % (m.__name__, sub) 216 submod = self.import_module(sub, subname, m) 217 if not submod: 218 raise ImportError("No module named " + subname) 219 220 def find_all_submodules(self, m): 221 if not m.__path__: 222 return 223 modules = {} 224 # 'suffixes' used to be a list hardcoded to [".py", ".pyc"]. 225 # But we must also collect Python extension modules - although 226 # we cannot separate normal dlls from Python extensions. 227 suffixes = [] 228 suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] 229 suffixes += importlib.machinery.SOURCE_SUFFIXES[:] 230 suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] 231 for dir in m.__path__: 232 try: 233 names = os.listdir(dir) 234 except OSError: 235 self.msg(2, "can't list directory", dir) 236 continue 237 for name in names: 238 mod = None 239 for suff in suffixes: 240 n = len(suff) 241 if name[-n:] == suff: 242 mod = name[:-n] 243 break 244 if mod and mod != "__init__": 245 modules[mod] = mod 246 return modules.keys() 247 248 def import_module(self, partname, fqname, parent): 249 self.msgin(3, "import_module", partname, fqname, parent) 250 try: 251 m = self.modules[fqname] 252 except KeyError: 253 pass 254 else: 255 self.msgout(3, "import_module ->", m) 256 return m 257 if fqname in self.badmodules: 258 self.msgout(3, "import_module -> None") 259 return None 260 if parent and parent.__path__ is None: 261 self.msgout(3, "import_module -> None") 262 return None 263 try: 264 fp, pathname, stuff = self.find_module(partname, 265 parent and parent.__path__, parent) 266 except ImportError: 267 self.msgout(3, "import_module ->", None) 268 return None 269 try: 270 m = self.load_module(fqname, fp, pathname, stuff) 271 finally: 272 if fp: 273 fp.close() 274 if parent: 275 setattr(parent, partname, m) 276 self.msgout(3, "import_module ->", m) 277 return m 278 279 def load_module(self, fqname, fp, pathname, file_info): 280 suffix, mode, type = file_info 281 self.msgin(2, "load_module", fqname, fp and "fp", pathname) 282 if type == imp.PKG_DIRECTORY: 283 m = self.load_package(fqname, pathname) 284 self.msgout(2, "load_module ->", m) 285 return m 286 if type == imp.PY_SOURCE: 287 co = compile(fp.read()+'\n', pathname, 'exec') 288 elif type == imp.PY_COMPILED: 289 try: 290 data = fp.read() 291 importlib._bootstrap_external._classify_pyc(data, fqname, {}) 292 except ImportError as exc: 293 self.msgout(2, "raise ImportError: " + str(exc), pathname) 294 raise 295 co = marshal.loads(memoryview(data)[16:]) 296 else: 297 co = None 298 m = self.add_module(fqname) 299 m.__file__ = pathname 300 if co: 301 if self.replace_paths: 302 co = self.replace_paths_in_code(co) 303 m.__code__ = co 304 self.scan_code(co, m) 305 self.msgout(2, "load_module ->", m) 306 return m 307 308 def _add_badmodule(self, name, caller): 309 if name not in self.badmodules: 310 self.badmodules[name] = {} 311 if caller: 312 self.badmodules[name][caller.__name__] = 1 313 else: 314 self.badmodules[name]["-"] = 1 315 316 def _safe_import_hook(self, name, caller, fromlist, level=-1): 317 # wrapper for self.import_hook() that won't raise ImportError 318 if name in self.badmodules: 319 self._add_badmodule(name, caller) 320 return 321 try: 322 self.import_hook(name, caller, level=level) 323 except ImportError as msg: 324 self.msg(2, "ImportError:", str(msg)) 325 self._add_badmodule(name, caller) 326 else: 327 if fromlist: 328 for sub in fromlist: 329 if sub in self.badmodules: 330 self._add_badmodule(sub, caller) 331 continue 332 try: 333 self.import_hook(name, caller, [sub], level=level) 334 except ImportError as msg: 335 self.msg(2, "ImportError:", str(msg)) 336 fullname = name + "." + sub 337 self._add_badmodule(fullname, caller) 338 339 def scan_opcodes(self, co): 340 # Scan the code, and yield 'interesting' opcode combinations 341 code = co.co_code 342 names = co.co_names 343 consts = co.co_consts 344 opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code) 345 if op != EXTENDED_ARG] 346 for i, (op, oparg) in enumerate(opargs): 347 if op in STORE_OPS: 348 yield "store", (names[oparg],) 349 continue 350 if (op == IMPORT_NAME and i >= 2 351 and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST): 352 level = consts[opargs[i-2][1]] 353 fromlist = consts[opargs[i-1][1]] 354 if level == 0: # absolute import 355 yield "absolute_import", (fromlist, names[oparg]) 356 else: # relative import 357 yield "relative_import", (level, fromlist, names[oparg]) 358 continue 359 360 def scan_code(self, co, m): 361 code = co.co_code 362 scanner = self.scan_opcodes 363 for what, args in scanner(co): 364 if what == "store": 365 name, = args 366 m.globalnames[name] = 1 367 elif what == "absolute_import": 368 fromlist, name = args 369 have_star = 0 370 if fromlist is not None: 371 if "*" in fromlist: 372 have_star = 1 373 fromlist = [f for f in fromlist if f != "*"] 374 self._safe_import_hook(name, m, fromlist, level=0) 375 if have_star: 376 # We've encountered an "import *". If it is a Python module, 377 # the code has already been parsed and we can suck out the 378 # global names. 379 mm = None 380 if m.__path__: 381 # At this point we don't know whether 'name' is a 382 # submodule of 'm' or a global module. Let's just try 383 # the full name first. 384 mm = self.modules.get(m.__name__ + "." + name) 385 if mm is None: 386 mm = self.modules.get(name) 387 if mm is not None: 388 m.globalnames.update(mm.globalnames) 389 m.starimports.update(mm.starimports) 390 if mm.__code__ is None: 391 m.starimports[name] = 1 392 else: 393 m.starimports[name] = 1 394 elif what == "relative_import": 395 level, fromlist, name = args 396 if name: 397 self._safe_import_hook(name, m, fromlist, level=level) 398 else: 399 parent = self.determine_parent(m, level=level) 400 self._safe_import_hook(parent.__name__, None, fromlist, level=0) 401 else: 402 # We don't expect anything else from the generator. 403 raise RuntimeError(what) 404 405 for c in co.co_consts: 406 if isinstance(c, type(co)): 407 self.scan_code(c, m) 408 409 def load_package(self, fqname, pathname): 410 self.msgin(2, "load_package", fqname, pathname) 411 newname = replacePackageMap.get(fqname) 412 if newname: 413 fqname = newname 414 m = self.add_module(fqname) 415 m.__file__ = pathname 416 m.__path__ = [pathname] 417 418 # As per comment at top of file, simulate runtime __path__ additions. 419 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) 420 421 fp, buf, stuff = self.find_module("__init__", m.__path__) 422 try: 423 self.load_module(fqname, fp, buf, stuff) 424 self.msgout(2, "load_package ->", m) 425 return m 426 finally: 427 if fp: 428 fp.close() 429 430 def add_module(self, fqname): 431 if fqname in self.modules: 432 return self.modules[fqname] 433 self.modules[fqname] = m = Module(fqname) 434 return m 435 436 def find_module(self, name, path, parent=None): 437 if parent is not None: 438 # assert path is not None 439 fullname = parent.__name__+'.'+name 440 else: 441 fullname = name 442 if fullname in self.excludes: 443 self.msgout(3, "find_module -> Excluded", fullname) 444 raise ImportError(name) 445 446 if path is None: 447 if name in sys.builtin_module_names: 448 return (None, None, ("", "", imp.C_BUILTIN)) 449 450 path = self.path 451 return imp.find_module(name, path) 452 453 def report(self): 454 """Print a report to stdout, listing the found modules with their 455 paths, as well as modules that are missing, or seem to be missing. 456 """ 457 print() 458 print(" %-25s %s" % ("Name", "File")) 459 print(" %-25s %s" % ("----", "----")) 460 # Print modules found 461 keys = sorted(self.modules.keys()) 462 for key in keys: 463 m = self.modules[key] 464 if m.__path__: 465 print("P", end=' ') 466 else: 467 print("m", end=' ') 468 print("%-25s" % key, m.__file__ or "") 469 470 # Print missing modules 471 missing, maybe = self.any_missing_maybe() 472 if missing: 473 print() 474 print("Missing modules:") 475 for name in missing: 476 mods = sorted(self.badmodules[name].keys()) 477 print("?", name, "imported from", ', '.join(mods)) 478 # Print modules that may be missing, but then again, maybe not... 479 if maybe: 480 print() 481 print("Submodules that appear to be missing, but could also be", end=' ') 482 print("global names in the parent package:") 483 for name in maybe: 484 mods = sorted(self.badmodules[name].keys()) 485 print("?", name, "imported from", ', '.join(mods)) 486 487 def any_missing(self): 488 """Return a list of modules that appear to be missing. Use 489 any_missing_maybe() if you want to know which modules are 490 certain to be missing, and which *may* be missing. 491 """ 492 missing, maybe = self.any_missing_maybe() 493 return missing + maybe 494 495 def any_missing_maybe(self): 496 """Return two lists, one with modules that are certainly missing 497 and one with modules that *may* be missing. The latter names could 498 either be submodules *or* just global names in the package. 499 500 The reason it can't always be determined is that it's impossible to 501 tell which names are imported when "from module import *" is done 502 with an extension module, short of actually importing it. 503 """ 504 missing = [] 505 maybe = [] 506 for name in self.badmodules: 507 if name in self.excludes: 508 continue 509 i = name.rfind(".") 510 if i < 0: 511 missing.append(name) 512 continue 513 subname = name[i+1:] 514 pkgname = name[:i] 515 pkg = self.modules.get(pkgname) 516 if pkg is not None: 517 if pkgname in self.badmodules[name]: 518 # The package tried to import this module itself and 519 # failed. It's definitely missing. 520 missing.append(name) 521 elif subname in pkg.globalnames: 522 # It's a global in the package: definitely not missing. 523 pass 524 elif pkg.starimports: 525 # It could be missing, but the package did an "import *" 526 # from a non-Python module, so we simply can't be sure. 527 maybe.append(name) 528 else: 529 # It's not a global in the package, the package didn't 530 # do funny star imports, it's very likely to be missing. 531 # The symbol could be inserted into the package from the 532 # outside, but since that's not good style we simply list 533 # it missing. 534 missing.append(name) 535 else: 536 missing.append(name) 537 missing.sort() 538 maybe.sort() 539 return missing, maybe 540 541 def replace_paths_in_code(self, co): 542 new_filename = original_filename = os.path.normpath(co.co_filename) 543 for f, r in self.replace_paths: 544 if original_filename.startswith(f): 545 new_filename = r + original_filename[len(f):] 546 break 547 548 if self.debug and original_filename not in self.processed_paths: 549 if new_filename != original_filename: 550 self.msgout(2, "co_filename %r changed to %r" \ 551 % (original_filename,new_filename,)) 552 else: 553 self.msgout(2, "co_filename %r remains unchanged" \ 554 % (original_filename,)) 555 self.processed_paths.append(original_filename) 556 557 consts = list(co.co_consts) 558 for i in range(len(consts)): 559 if isinstance(consts[i], type(co)): 560 consts[i] = self.replace_paths_in_code(consts[i]) 561 562 return types.CodeType(co.co_argcount, co.co_kwonlyargcount, 563 co.co_nlocals, co.co_stacksize, co.co_flags, 564 co.co_code, tuple(consts), co.co_names, 565 co.co_varnames, new_filename, co.co_name, 566 co.co_firstlineno, co.co_lnotab, co.co_freevars, 567 co.co_cellvars) 568 569 570def test(): 571 # Parse command line 572 import getopt 573 try: 574 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") 575 except getopt.error as msg: 576 print(msg) 577 return 578 579 # Process options 580 debug = 1 581 domods = 0 582 addpath = [] 583 exclude = [] 584 for o, a in opts: 585 if o == '-d': 586 debug = debug + 1 587 if o == '-m': 588 domods = 1 589 if o == '-p': 590 addpath = addpath + a.split(os.pathsep) 591 if o == '-q': 592 debug = 0 593 if o == '-x': 594 exclude.append(a) 595 596 # Provide default arguments 597 if not args: 598 script = "hello.py" 599 else: 600 script = args[0] 601 602 # Set the path based on sys.path and the script directory 603 path = sys.path[:] 604 path[0] = os.path.dirname(script) 605 path = addpath + path 606 if debug > 1: 607 print("path:") 608 for item in path: 609 print(" ", repr(item)) 610 611 # Create the module finder and turn its crank 612 mf = ModuleFinder(path, debug, exclude) 613 for arg in args[1:]: 614 if arg == '-m': 615 domods = 1 616 continue 617 if domods: 618 if arg[-2:] == '.*': 619 mf.import_hook(arg[:-2], None, ["*"]) 620 else: 621 mf.import_hook(arg) 622 else: 623 mf.load_file(arg) 624 mf.run_script(script) 625 mf.report() 626 return mf # for -i debugging 627 628 629if __name__ == '__main__': 630 try: 631 mf = test() 632 except KeyboardInterrupt: 633 print("\n[interrupted]") 634