1"""Find modules used by a script, using introspection.""" 2 3from __future__ import generators 4import dis 5import imp 6import marshal 7import os 8import sys 9import types 10import struct 11 12if hasattr(sys.__stdout__, "newlines"): 13 READ_MODE = "U" # universal line endings 14else: 15 # Python < 2.3 compatibility, no longer strictly required 16 READ_MODE = "r" 17 18LOAD_CONST = dis.opmap['LOAD_CONST'] 19IMPORT_NAME = dis.opmap['IMPORT_NAME'] 20STORE_NAME = dis.opmap['STORE_NAME'] 21STORE_GLOBAL = dis.opmap['STORE_GLOBAL'] 22STORE_OPS = STORE_NAME, STORE_GLOBAL 23HAVE_ARGUMENT = dis.HAVE_ARGUMENT 24EXTENDED_ARG = dis.EXTENDED_ARG 25 26def _unpack_opargs(code): 27 # enumerate() is not an option, since we sometimes process 28 # multiple elements on a single pass through the loop 29 extended_arg = 0 30 n = len(code) 31 i = 0 32 while i < n: 33 op = ord(code[i]) 34 offset = i 35 i = i+1 36 arg = None 37 if op >= HAVE_ARGUMENT: 38 arg = ord(code[i]) + ord(code[i+1])*256 + extended_arg 39 extended_arg = 0 40 i = i+2 41 if op == EXTENDED_ARG: 42 extended_arg = arg*65536 43 yield (offset, op, arg) 44 45# Modulefinder does a good job at simulating Python's, but it can not 46# handle __path__ modifications packages make at runtime. Therefore there 47# is a mechanism whereby you can register extra paths in this map for a 48# package, and it will be honored. 49 50# Note this is a mapping is lists of paths. 51packagePathMap = {} 52 53# A Public interface 54def AddPackagePath(packagename, path): 55 paths = packagePathMap.get(packagename, []) 56 paths.append(path) 57 packagePathMap[packagename] = paths 58 59replacePackageMap = {} 60 61# This ReplacePackage mechanism allows modulefinder to work around the 62# way the _xmlplus package injects itself under the name "xml" into 63# sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml") 64# before running ModuleFinder. 65 66def ReplacePackage(oldname, newname): 67 replacePackageMap[oldname] = newname 68 69 70class Module: 71 72 def __init__(self, name, file=None, path=None): 73 self.__name__ = name 74 self.__file__ = file 75 self.__path__ = path 76 self.__code__ = None 77 # The set of global names that are assigned to in the module. 78 # This includes those names imported through starimports of 79 # Python modules. 80 self.globalnames = {} 81 # The set of starimports this module did that could not be 82 # resolved, ie. a starimport from a non-Python module. 83 self.starimports = {} 84 85 def __repr__(self): 86 s = "Module(%r" % (self.__name__,) 87 if self.__file__ is not None: 88 s = s + ", %r" % (self.__file__,) 89 if self.__path__ is not None: 90 s = s + ", %r" % (self.__path__,) 91 s = s + ")" 92 return s 93 94class ModuleFinder: 95 96 def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): 97 if path is None: 98 path = sys.path 99 self.path = path 100 self.modules = {} 101 self.badmodules = {} 102 self.debug = debug 103 self.indent = 0 104 self.excludes = excludes 105 self.replace_paths = replace_paths 106 self.processed_paths = [] # Used in debugging only 107 108 def msg(self, level, str, *args): 109 if level <= self.debug: 110 for i in range(self.indent): 111 print " ", 112 print str, 113 for arg in args: 114 print repr(arg), 115 print 116 117 def msgin(self, *args): 118 level = args[0] 119 if level <= self.debug: 120 self.indent = self.indent + 1 121 self.msg(*args) 122 123 def msgout(self, *args): 124 level = args[0] 125 if level <= self.debug: 126 self.indent = self.indent - 1 127 self.msg(*args) 128 129 def run_script(self, pathname): 130 self.msg(2, "run_script", pathname) 131 with open(pathname, READ_MODE) as fp: 132 stuff = ("", "r", imp.PY_SOURCE) 133 self.load_module('__main__', fp, pathname, stuff) 134 135 def load_file(self, pathname): 136 dir, name = os.path.split(pathname) 137 name, ext = os.path.splitext(name) 138 with open(pathname, READ_MODE) as fp: 139 stuff = (ext, "r", imp.PY_SOURCE) 140 self.load_module(name, fp, pathname, stuff) 141 142 def import_hook(self, name, caller=None, fromlist=None, level=-1): 143 self.msg(3, "import_hook", name, caller, fromlist, level) 144 parent = self.determine_parent(caller, level=level) 145 q, tail = self.find_head_package(parent, name) 146 m = self.load_tail(q, tail) 147 if not fromlist: 148 return q 149 if m.__path__: 150 self.ensure_fromlist(m, fromlist) 151 return None 152 153 def determine_parent(self, caller, level=-1): 154 self.msgin(4, "determine_parent", caller, level) 155 if not caller or level == 0: 156 self.msgout(4, "determine_parent -> None") 157 return None 158 pname = caller.__name__ 159 if level >= 1: # relative import 160 if caller.__path__: 161 level -= 1 162 if level == 0: 163 parent = self.modules[pname] 164 assert parent is caller 165 self.msgout(4, "determine_parent ->", parent) 166 return parent 167 if pname.count(".") < level: 168 raise ImportError, "relative importpath too deep" 169 pname = ".".join(pname.split(".")[:-level]) 170 parent = self.modules[pname] 171 self.msgout(4, "determine_parent ->", parent) 172 return parent 173 if caller.__path__: 174 parent = self.modules[pname] 175 assert caller is parent 176 self.msgout(4, "determine_parent ->", parent) 177 return parent 178 if '.' in pname: 179 i = pname.rfind('.') 180 pname = pname[:i] 181 parent = self.modules[pname] 182 assert parent.__name__ == pname 183 self.msgout(4, "determine_parent ->", parent) 184 return parent 185 self.msgout(4, "determine_parent -> None") 186 return None 187 188 def find_head_package(self, parent, name): 189 self.msgin(4, "find_head_package", parent, name) 190 if '.' in name: 191 i = name.find('.') 192 head = name[:i] 193 tail = name[i+1:] 194 else: 195 head = name 196 tail = "" 197 if parent: 198 qname = "%s.%s" % (parent.__name__, head) 199 else: 200 qname = head 201 q = self.import_module(head, qname, parent) 202 if q: 203 self.msgout(4, "find_head_package ->", (q, tail)) 204 return q, tail 205 if parent: 206 qname = head 207 parent = None 208 q = self.import_module(head, qname, parent) 209 if q: 210 self.msgout(4, "find_head_package ->", (q, tail)) 211 return q, tail 212 self.msgout(4, "raise ImportError: No module named", qname) 213 raise ImportError, "No module named " + qname 214 215 def load_tail(self, q, tail): 216 self.msgin(4, "load_tail", q, tail) 217 m = q 218 while tail: 219 i = tail.find('.') 220 if i < 0: i = len(tail) 221 head, tail = tail[:i], tail[i+1:] 222 mname = "%s.%s" % (m.__name__, head) 223 m = self.import_module(head, mname, m) 224 if not m: 225 self.msgout(4, "raise ImportError: No module named", mname) 226 raise ImportError, "No module named " + mname 227 self.msgout(4, "load_tail ->", m) 228 return m 229 230 def ensure_fromlist(self, m, fromlist, recursive=0): 231 self.msg(4, "ensure_fromlist", m, fromlist, recursive) 232 for sub in fromlist: 233 if sub == "*": 234 if not recursive: 235 all = self.find_all_submodules(m) 236 if all: 237 self.ensure_fromlist(m, all, 1) 238 elif not hasattr(m, sub): 239 subname = "%s.%s" % (m.__name__, sub) 240 submod = self.import_module(sub, subname, m) 241 if not submod: 242 raise ImportError, "No module named " + subname 243 244 def find_all_submodules(self, m): 245 if not m.__path__: 246 return 247 modules = {} 248 # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"]. 249 # But we must also collect Python extension modules - although 250 # we cannot separate normal dlls from Python extensions. 251 suffixes = [] 252 for triple in imp.get_suffixes(): 253 suffixes.append(triple[0]) 254 for dir in m.__path__: 255 try: 256 names = os.listdir(dir) 257 except os.error: 258 self.msg(2, "can't list directory", dir) 259 continue 260 for name in names: 261 mod = None 262 for suff in suffixes: 263 n = len(suff) 264 if name[-n:] == suff: 265 mod = name[:-n] 266 break 267 if mod and mod != "__init__": 268 modules[mod] = mod 269 return modules.keys() 270 271 def import_module(self, partname, fqname, parent): 272 self.msgin(3, "import_module", partname, fqname, parent) 273 try: 274 m = self.modules[fqname] 275 except KeyError: 276 pass 277 else: 278 self.msgout(3, "import_module ->", m) 279 return m 280 if fqname in self.badmodules: 281 self.msgout(3, "import_module -> None") 282 return None 283 if parent and parent.__path__ is None: 284 self.msgout(3, "import_module -> None") 285 return None 286 try: 287 fp, pathname, stuff = self.find_module(partname, 288 parent and parent.__path__, parent) 289 except ImportError: 290 self.msgout(3, "import_module ->", None) 291 return None 292 try: 293 m = self.load_module(fqname, fp, pathname, stuff) 294 finally: 295 if fp: fp.close() 296 if parent: 297 setattr(parent, partname, m) 298 self.msgout(3, "import_module ->", m) 299 return m 300 301 def load_module(self, fqname, fp, pathname, file_info): 302 suffix, mode, type = file_info 303 self.msgin(2, "load_module", fqname, fp and "fp", pathname) 304 if type == imp.PKG_DIRECTORY: 305 m = self.load_package(fqname, pathname) 306 self.msgout(2, "load_module ->", m) 307 return m 308 if type == imp.PY_SOURCE: 309 co = compile(fp.read()+'\n', pathname, 'exec') 310 elif type == imp.PY_COMPILED: 311 if fp.read(4) != imp.get_magic(): 312 self.msgout(2, "raise ImportError: Bad magic number", pathname) 313 raise ImportError, "Bad magic number in %s" % pathname 314 fp.read(4) 315 co = marshal.load(fp) 316 else: 317 co = None 318 m = self.add_module(fqname) 319 m.__file__ = pathname 320 if co: 321 if self.replace_paths: 322 co = self.replace_paths_in_code(co) 323 m.__code__ = co 324 self.scan_code(co, m) 325 self.msgout(2, "load_module ->", m) 326 return m 327 328 def _add_badmodule(self, name, caller): 329 if name not in self.badmodules: 330 self.badmodules[name] = {} 331 if caller: 332 self.badmodules[name][caller.__name__] = 1 333 else: 334 self.badmodules[name]["-"] = 1 335 336 def _safe_import_hook(self, name, caller, fromlist, level=-1): 337 # wrapper for self.import_hook() that won't raise ImportError 338 if name in self.badmodules: 339 self._add_badmodule(name, caller) 340 return 341 try: 342 self.import_hook(name, caller, level=level) 343 except ImportError, msg: 344 self.msg(2, "ImportError:", str(msg)) 345 self._add_badmodule(name, caller) 346 else: 347 if fromlist: 348 for sub in fromlist: 349 if sub in self.badmodules: 350 self._add_badmodule(sub, caller) 351 continue 352 try: 353 self.import_hook(name, caller, [sub], level=level) 354 except ImportError, msg: 355 self.msg(2, "ImportError:", str(msg)) 356 fullname = name + "." + sub 357 self._add_badmodule(fullname, caller) 358 359 def scan_opcodes(self, co, 360 unpack = struct.unpack): 361 # Scan the code, and yield 'interesting' opcode combinations 362 # Version for Python 2.4 and older 363 code = co.co_code 364 names = co.co_names 365 consts = co.co_consts 366 opargs = [(op, arg) for _, op, arg in _unpack_opargs(code) 367 if op != EXTENDED_ARG] 368 for i, (op, oparg) in enumerate(opargs): 369 if c in STORE_OPS: 370 yield "store", (names[oparg],) 371 continue 372 if (op == IMPORT_NAME and i >= 1 373 and opargs[i-1][0] == LOAD_CONST): 374 fromlist = consts[opargs[i-1][1]] 375 yield "import", (fromlist, names[oparg]) 376 continue 377 378 def scan_opcodes_25(self, co): 379 # Scan the code, and yield 'interesting' opcode combinations 380 code = co.co_code 381 names = co.co_names 382 consts = co.co_consts 383 opargs = [(op, arg) for _, op, arg in _unpack_opargs(code) 384 if op != EXTENDED_ARG] 385 for i, (op, oparg) in enumerate(opargs): 386 if op in STORE_OPS: 387 yield "store", (names[oparg],) 388 continue 389 if (op == IMPORT_NAME and i >= 2 390 and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST): 391 level = consts[opargs[i-2][1]] 392 fromlist = consts[opargs[i-1][1]] 393 if level == -1: # normal import 394 yield "import", (fromlist, names[oparg]) 395 elif level == 0: # absolute import 396 yield "absolute_import", (fromlist, names[oparg]) 397 else: # relative import 398 yield "relative_import", (level, fromlist, names[oparg]) 399 continue 400 401 def scan_code(self, co, m): 402 code = co.co_code 403 if sys.version_info >= (2, 5): 404 scanner = self.scan_opcodes_25 405 else: 406 scanner = self.scan_opcodes 407 for what, args in scanner(co): 408 if what == "store": 409 name, = args 410 m.globalnames[name] = 1 411 elif what in ("import", "absolute_import"): 412 fromlist, name = args 413 have_star = 0 414 if fromlist is not None: 415 if "*" in fromlist: 416 have_star = 1 417 fromlist = [f for f in fromlist if f != "*"] 418 if what == "absolute_import": level = 0 419 else: level = -1 420 self._safe_import_hook(name, m, fromlist, level=level) 421 if have_star: 422 # We've encountered an "import *". If it is a Python module, 423 # the code has already been parsed and we can suck out the 424 # global names. 425 mm = None 426 if m.__path__: 427 # At this point we don't know whether 'name' is a 428 # submodule of 'm' or a global module. Let's just try 429 # the full name first. 430 mm = self.modules.get(m.__name__ + "." + name) 431 if mm is None: 432 mm = self.modules.get(name) 433 if mm is not None: 434 m.globalnames.update(mm.globalnames) 435 m.starimports.update(mm.starimports) 436 if mm.__code__ is None: 437 m.starimports[name] = 1 438 else: 439 m.starimports[name] = 1 440 elif what == "relative_import": 441 level, fromlist, name = args 442 if name: 443 self._safe_import_hook(name, m, fromlist, level=level) 444 else: 445 parent = self.determine_parent(m, level=level) 446 self._safe_import_hook(parent.__name__, None, fromlist, level=0) 447 else: 448 # We don't expect anything else from the generator. 449 raise RuntimeError(what) 450 451 for c in co.co_consts: 452 if isinstance(c, type(co)): 453 self.scan_code(c, m) 454 455 def load_package(self, fqname, pathname): 456 self.msgin(2, "load_package", fqname, pathname) 457 newname = replacePackageMap.get(fqname) 458 if newname: 459 fqname = newname 460 m = self.add_module(fqname) 461 m.__file__ = pathname 462 m.__path__ = [pathname] 463 464 # As per comment at top of file, simulate runtime __path__ additions. 465 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) 466 467 fp, buf, stuff = self.find_module("__init__", m.__path__) 468 self.load_module(fqname, fp, buf, stuff) 469 self.msgout(2, "load_package ->", m) 470 if fp: 471 fp.close() 472 return m 473 474 def add_module(self, fqname): 475 if fqname in self.modules: 476 return self.modules[fqname] 477 self.modules[fqname] = m = Module(fqname) 478 return m 479 480 def find_module(self, name, path, parent=None): 481 if parent is not None: 482 # assert path is not None 483 fullname = parent.__name__+'.'+name 484 else: 485 fullname = name 486 if fullname in self.excludes: 487 self.msgout(3, "find_module -> Excluded", fullname) 488 raise ImportError, name 489 490 if path is None: 491 if name in sys.builtin_module_names: 492 return (None, None, ("", "", imp.C_BUILTIN)) 493 494 path = self.path 495 return imp.find_module(name, path) 496 497 def report(self): 498 """Print a report to stdout, listing the found modules with their 499 paths, as well as modules that are missing, or seem to be missing. 500 """ 501 print 502 print " %-25s %s" % ("Name", "File") 503 print " %-25s %s" % ("----", "----") 504 # Print modules found 505 keys = self.modules.keys() 506 keys.sort() 507 for key in keys: 508 m = self.modules[key] 509 if m.__path__: 510 print "P", 511 else: 512 print "m", 513 print "%-25s" % key, m.__file__ or "" 514 515 # Print missing modules 516 missing, maybe = self.any_missing_maybe() 517 if missing: 518 print 519 print "Missing modules:" 520 for name in missing: 521 mods = self.badmodules[name].keys() 522 mods.sort() 523 print "?", name, "imported from", ', '.join(mods) 524 # Print modules that may be missing, but then again, maybe not... 525 if maybe: 526 print 527 print "Submodules that appear to be missing, but could also be", 528 print "global names in the parent package:" 529 for name in maybe: 530 mods = self.badmodules[name].keys() 531 mods.sort() 532 print "?", name, "imported from", ', '.join(mods) 533 534 def any_missing(self): 535 """Return a list of modules that appear to be missing. Use 536 any_missing_maybe() if you want to know which modules are 537 certain to be missing, and which *may* be missing. 538 """ 539 missing, maybe = self.any_missing_maybe() 540 return missing + maybe 541 542 def any_missing_maybe(self): 543 """Return two lists, one with modules that are certainly missing 544 and one with modules that *may* be missing. The latter names could 545 either be submodules *or* just global names in the package. 546 547 The reason it can't always be determined is that it's impossible to 548 tell which names are imported when "from module import *" is done 549 with an extension module, short of actually importing it. 550 """ 551 missing = [] 552 maybe = [] 553 for name in self.badmodules: 554 if name in self.excludes: 555 continue 556 i = name.rfind(".") 557 if i < 0: 558 missing.append(name) 559 continue 560 subname = name[i+1:] 561 pkgname = name[:i] 562 pkg = self.modules.get(pkgname) 563 if pkg is not None: 564 if pkgname in self.badmodules[name]: 565 # The package tried to import this module itself and 566 # failed. It's definitely missing. 567 missing.append(name) 568 elif subname in pkg.globalnames: 569 # It's a global in the package: definitely not missing. 570 pass 571 elif pkg.starimports: 572 # It could be missing, but the package did an "import *" 573 # from a non-Python module, so we simply can't be sure. 574 maybe.append(name) 575 else: 576 # It's not a global in the package, the package didn't 577 # do funny star imports, it's very likely to be missing. 578 # The symbol could be inserted into the package from the 579 # outside, but since that's not good style we simply list 580 # it missing. 581 missing.append(name) 582 else: 583 missing.append(name) 584 missing.sort() 585 maybe.sort() 586 return missing, maybe 587 588 def replace_paths_in_code(self, co): 589 new_filename = original_filename = os.path.normpath(co.co_filename) 590 for f, r in self.replace_paths: 591 if original_filename.startswith(f): 592 new_filename = r + original_filename[len(f):] 593 break 594 595 if self.debug and original_filename not in self.processed_paths: 596 if new_filename != original_filename: 597 self.msgout(2, "co_filename %r changed to %r" \ 598 % (original_filename,new_filename,)) 599 else: 600 self.msgout(2, "co_filename %r remains unchanged" \ 601 % (original_filename,)) 602 self.processed_paths.append(original_filename) 603 604 consts = list(co.co_consts) 605 for i in range(len(consts)): 606 if isinstance(consts[i], type(co)): 607 consts[i] = self.replace_paths_in_code(consts[i]) 608 609 return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize, 610 co.co_flags, co.co_code, tuple(consts), co.co_names, 611 co.co_varnames, new_filename, co.co_name, 612 co.co_firstlineno, co.co_lnotab, 613 co.co_freevars, co.co_cellvars) 614 615 616def test(): 617 # Parse command line 618 import getopt 619 try: 620 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") 621 except getopt.error, msg: 622 print msg 623 return 624 625 # Process options 626 debug = 1 627 domods = 0 628 addpath = [] 629 exclude = [] 630 for o, a in opts: 631 if o == '-d': 632 debug = debug + 1 633 if o == '-m': 634 domods = 1 635 if o == '-p': 636 addpath = addpath + a.split(os.pathsep) 637 if o == '-q': 638 debug = 0 639 if o == '-x': 640 exclude.append(a) 641 642 # Provide default arguments 643 if not args: 644 script = "hello.py" 645 else: 646 script = args[0] 647 648 # Set the path based on sys.path and the script directory 649 path = sys.path[:] 650 path[0] = os.path.dirname(script) 651 path = addpath + path 652 if debug > 1: 653 print "path:" 654 for item in path: 655 print " ", repr(item) 656 657 # Create the module finder and turn its crank 658 mf = ModuleFinder(path, debug, exclude) 659 for arg in args[1:]: 660 if arg == '-m': 661 domods = 1 662 continue 663 if domods: 664 if arg[-2:] == '.*': 665 mf.import_hook(arg[:-2], None, ["*"]) 666 else: 667 mf.import_hook(arg) 668 else: 669 mf.load_file(arg) 670 mf.run_script(script) 671 mf.report() 672 return mf # for -i debugging 673 674 675if __name__ == '__main__': 676 try: 677 mf = test() 678 except KeyboardInterrupt: 679 print "\n[interrupt]" 680