1"""Parse a Python module and describe its classes and functions. 2 3Parse enough of a Python file to recognize imports and class and 4function definitions, and to find out the superclasses of a class. 5 6The interface consists of a single function: 7 readmodule_ex(module, path=None) 8where module is the name of a Python module, and path is an optional 9list of directories where the module is to be searched. If present, 10path is prepended to the system search path sys.path. The return value 11is a dictionary. The keys of the dictionary are the names of the 12classes and functions defined in the module (including classes that are 13defined via the from XXX import YYY construct). The values are 14instances of classes Class and Function. One special key/value pair is 15present for packages: the key '__path__' has a list as its value which 16contains the package search path. 17 18Classes and Functions have a common superclass: _Object. Every instance 19has the following attributes: 20 module -- name of the module; 21 name -- name of the object; 22 file -- file in which the object is defined; 23 lineno -- line in the file where the object's definition starts; 24 parent -- parent of this object, if any; 25 children -- nested objects contained in this object. 26The 'children' attribute is a dictionary mapping names to objects. 27 28Instances of Function describe functions with the attributes from _Object. 29 30Instances of Class describe classes with the attributes from _Object, 31plus the following: 32 super -- list of super classes (Class instances if possible); 33 methods -- mapping of method names to beginning line numbers. 34If the name of a super class is not recognized, the corresponding 35entry in the list of super classes is not a class instance but a 36string giving the name of the super class. Since import statements 37are recognized and imported modules are scanned as well, this 38shouldn't happen often. 39""" 40 41import io 42import sys 43import importlib.util 44import tokenize 45from token import NAME, DEDENT, OP 46 47__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] 48 49_modules = {} # Initialize cache of modules we've seen. 50 51 52class _Object: 53 "Information about Python class or function." 54 def __init__(self, module, name, file, lineno, parent): 55 self.module = module 56 self.name = name 57 self.file = file 58 self.lineno = lineno 59 self.parent = parent 60 self.children = {} 61 62 def _addchild(self, name, obj): 63 self.children[name] = obj 64 65 66class Function(_Object): 67 "Information about a Python function, including methods." 68 def __init__(self, module, name, file, lineno, parent=None): 69 _Object.__init__(self, module, name, file, lineno, parent) 70 71 72class Class(_Object): 73 "Information about a Python class." 74 def __init__(self, module, name, super, file, lineno, parent=None): 75 _Object.__init__(self, module, name, file, lineno, parent) 76 self.super = [] if super is None else super 77 self.methods = {} 78 79 def _addmethod(self, name, lineno): 80 self.methods[name] = lineno 81 82 83def _nest_function(ob, func_name, lineno): 84 "Return a Function after nesting within ob." 85 newfunc = Function(ob.module, func_name, ob.file, lineno, ob) 86 ob._addchild(func_name, newfunc) 87 if isinstance(ob, Class): 88 ob._addmethod(func_name, lineno) 89 return newfunc 90 91def _nest_class(ob, class_name, lineno, super=None): 92 "Return a Class after nesting within ob." 93 newclass = Class(ob.module, class_name, super, ob.file, lineno, ob) 94 ob._addchild(class_name, newclass) 95 return newclass 96 97def readmodule(module, path=None): 98 """Return Class objects for the top-level classes in module. 99 100 This is the original interface, before Functions were added. 101 """ 102 103 res = {} 104 for key, value in _readmodule(module, path or []).items(): 105 if isinstance(value, Class): 106 res[key] = value 107 return res 108 109def readmodule_ex(module, path=None): 110 """Return a dictionary with all functions and classes in module. 111 112 Search for module in PATH + sys.path. 113 If possible, include imported superclasses. 114 Do this by reading source, without importing (and executing) it. 115 """ 116 return _readmodule(module, path or []) 117 118def _readmodule(module, path, inpackage=None): 119 """Do the hard work for readmodule[_ex]. 120 121 If inpackage is given, it must be the dotted name of the package in 122 which we are searching for a submodule, and then PATH must be the 123 package search path; otherwise, we are searching for a top-level 124 module, and path is combined with sys.path. 125 """ 126 # Compute the full module name (prepending inpackage if set). 127 if inpackage is not None: 128 fullmodule = "%s.%s" % (inpackage, module) 129 else: 130 fullmodule = module 131 132 # Check in the cache. 133 if fullmodule in _modules: 134 return _modules[fullmodule] 135 136 # Initialize the dict for this module's contents. 137 tree = {} 138 139 # Check if it is a built-in module; we don't do much for these. 140 if module in sys.builtin_module_names and inpackage is None: 141 _modules[module] = tree 142 return tree 143 144 # Check for a dotted module name. 145 i = module.rfind('.') 146 if i >= 0: 147 package = module[:i] 148 submodule = module[i+1:] 149 parent = _readmodule(package, path, inpackage) 150 if inpackage is not None: 151 package = "%s.%s" % (inpackage, package) 152 if not '__path__' in parent: 153 raise ImportError('No package named {}'.format(package)) 154 return _readmodule(submodule, parent['__path__'], package) 155 156 # Search the path for the module. 157 f = None 158 if inpackage is not None: 159 search_path = path 160 else: 161 search_path = path + sys.path 162 spec = importlib.util._find_spec_from_path(fullmodule, search_path) 163 if spec is None: 164 raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule) 165 _modules[fullmodule] = tree 166 # Is module a package? 167 if spec.submodule_search_locations is not None: 168 tree['__path__'] = spec.submodule_search_locations 169 try: 170 source = spec.loader.get_source(fullmodule) 171 except (AttributeError, ImportError): 172 # If module is not Python source, we cannot do anything. 173 return tree 174 else: 175 if source is None: 176 return tree 177 178 fname = spec.loader.get_filename(fullmodule) 179 return _create_tree(fullmodule, path, fname, source, tree, inpackage) 180 181 182def _create_tree(fullmodule, path, fname, source, tree, inpackage): 183 """Return the tree for a particular module. 184 185 fullmodule (full module name), inpackage+module, becomes o.module. 186 path is passed to recursive calls of _readmodule. 187 fname becomes o.file. 188 source is tokenized. Imports cause recursive calls to _readmodule. 189 tree is {} or {'__path__': <submodule search locations>}. 190 inpackage, None or string, is passed to recursive calls of _readmodule. 191 192 The effect of recursive calls is mutation of global _modules. 193 """ 194 f = io.StringIO(source) 195 196 stack = [] # Initialize stack of (class, indent) pairs. 197 198 g = tokenize.generate_tokens(f.readline) 199 try: 200 for tokentype, token, start, _end, _line in g: 201 if tokentype == DEDENT: 202 lineno, thisindent = start 203 # Close previous nested classes and defs. 204 while stack and stack[-1][1] >= thisindent: 205 del stack[-1] 206 elif token == 'def': 207 lineno, thisindent = start 208 # Close previous nested classes and defs. 209 while stack and stack[-1][1] >= thisindent: 210 del stack[-1] 211 tokentype, func_name, start = next(g)[0:3] 212 if tokentype != NAME: 213 continue # Skip def with syntax error. 214 cur_func = None 215 if stack: 216 cur_obj = stack[-1][0] 217 cur_func = _nest_function(cur_obj, func_name, lineno) 218 else: 219 # It is just a function. 220 cur_func = Function(fullmodule, func_name, fname, lineno) 221 tree[func_name] = cur_func 222 stack.append((cur_func, thisindent)) 223 elif token == 'class': 224 lineno, thisindent = start 225 # Close previous nested classes and defs. 226 while stack and stack[-1][1] >= thisindent: 227 del stack[-1] 228 tokentype, class_name, start = next(g)[0:3] 229 if tokentype != NAME: 230 continue # Skip class with syntax error. 231 # Parse what follows the class name. 232 tokentype, token, start = next(g)[0:3] 233 inherit = None 234 if token == '(': 235 names = [] # Initialize list of superclasses. 236 level = 1 237 super = [] # Tokens making up current superclass. 238 while True: 239 tokentype, token, start = next(g)[0:3] 240 if token in (')', ',') and level == 1: 241 n = "".join(super) 242 if n in tree: 243 # We know this super class. 244 n = tree[n] 245 else: 246 c = n.split('.') 247 if len(c) > 1: 248 # Super class form is module.class: 249 # look in module for class. 250 m = c[-2] 251 c = c[-1] 252 if m in _modules: 253 d = _modules[m] 254 if c in d: 255 n = d[c] 256 names.append(n) 257 super = [] 258 if token == '(': 259 level += 1 260 elif token == ')': 261 level -= 1 262 if level == 0: 263 break 264 elif token == ',' and level == 1: 265 pass 266 # Only use NAME and OP (== dot) tokens for type name. 267 elif tokentype in (NAME, OP) and level == 1: 268 super.append(token) 269 # Expressions in the base list are not supported. 270 inherit = names 271 if stack: 272 cur_obj = stack[-1][0] 273 cur_class = _nest_class( 274 cur_obj, class_name, lineno, inherit) 275 else: 276 cur_class = Class(fullmodule, class_name, inherit, 277 fname, lineno) 278 tree[class_name] = cur_class 279 stack.append((cur_class, thisindent)) 280 elif token == 'import' and start[1] == 0: 281 modules = _getnamelist(g) 282 for mod, _mod2 in modules: 283 try: 284 # Recursively read the imported module. 285 if inpackage is None: 286 _readmodule(mod, path) 287 else: 288 try: 289 _readmodule(mod, path, inpackage) 290 except ImportError: 291 _readmodule(mod, []) 292 except: 293 # If we can't find or parse the imported module, 294 # too bad -- don't die here. 295 pass 296 elif token == 'from' and start[1] == 0: 297 mod, token = _getname(g) 298 if not mod or token != "import": 299 continue 300 names = _getnamelist(g) 301 try: 302 # Recursively read the imported module. 303 d = _readmodule(mod, path, inpackage) 304 except: 305 # If we can't find or parse the imported module, 306 # too bad -- don't die here. 307 continue 308 # Add any classes that were defined in the imported module 309 # to our name space if they were mentioned in the list. 310 for n, n2 in names: 311 if n in d: 312 tree[n2 or n] = d[n] 313 elif n == '*': 314 # Don't add names that start with _. 315 for n in d: 316 if n[0] != '_': 317 tree[n] = d[n] 318 except StopIteration: 319 pass 320 321 f.close() 322 return tree 323 324 325def _getnamelist(g): 326 """Return list of (dotted-name, as-name or None) tuples for token source g. 327 328 An as-name is the name that follows 'as' in an as clause. 329 """ 330 names = [] 331 while True: 332 name, token = _getname(g) 333 if not name: 334 break 335 if token == 'as': 336 name2, token = _getname(g) 337 else: 338 name2 = None 339 names.append((name, name2)) 340 while token != "," and "\n" not in token: 341 token = next(g)[1] 342 if token != ",": 343 break 344 return names 345 346 347def _getname(g): 348 "Return (dotted-name or None, next-token) tuple for token source g." 349 parts = [] 350 tokentype, token = next(g)[0:2] 351 if tokentype != NAME and token != '*': 352 return (None, token) 353 parts.append(token) 354 while True: 355 tokentype, token = next(g)[0:2] 356 if token != '.': 357 break 358 tokentype, token = next(g)[0:2] 359 if tokentype != NAME: 360 break 361 parts.append(token) 362 return (".".join(parts), token) 363 364 365def _main(): 366 "Print module output (default this file) for quick visual check." 367 import os 368 try: 369 mod = sys.argv[1] 370 except: 371 mod = __file__ 372 if os.path.exists(mod): 373 path = [os.path.dirname(mod)] 374 mod = os.path.basename(mod) 375 if mod.lower().endswith(".py"): 376 mod = mod[:-3] 377 else: 378 path = [] 379 tree = readmodule_ex(mod, path) 380 lineno_key = lambda a: getattr(a, 'lineno', 0) 381 objs = sorted(tree.values(), key=lineno_key, reverse=True) 382 indent_level = 2 383 while objs: 384 obj = objs.pop() 385 if isinstance(obj, list): 386 # Value is a __path__ key. 387 continue 388 if not hasattr(obj, 'indent'): 389 obj.indent = 0 390 391 if isinstance(obj, _Object): 392 new_objs = sorted(obj.children.values(), 393 key=lineno_key, reverse=True) 394 for ob in new_objs: 395 ob.indent = obj.indent + indent_level 396 objs.extend(new_objs) 397 if isinstance(obj, Class): 398 print("{}class {} {} {}" 399 .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) 400 elif isinstance(obj, Function): 401 print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) 402 403if __name__ == "__main__": 404 _main() 405