1"""Parse a Python module and describe its classes and functions. 2 3Parse enough of a Python file to recognize imports and class and 4function definitions, and to find out the superclasses of a class. 5 6The interface consists of a single function: 7 readmodule_ex(module, path=None) 8where module is the name of a Python module, and path is an optional 9list of directories where the module is to be searched. If present, 10path is prepended to the system search path sys.path. The return value 11is a dictionary. The keys of the dictionary are the names of the 12classes and functions defined in the module (including classes that are 13defined via the from XXX import YYY construct). The values are 14instances of classes Class and Function. One special key/value pair is 15present for packages: the key '__path__' has a list as its value which 16contains the package search path. 17 18Classes and Functions have a common superclass: _Object. Every instance 19has the following attributes: 20 module -- name of the module; 21 name -- name of the object; 22 file -- file in which the object is defined; 23 lineno -- line in the file where the object's definition starts; 24 parent -- parent of this object, if any; 25 children -- nested objects contained in this object. 26The 'children' attribute is a dictionary mapping names to objects. 27 28Instances of Function describe functions with the attributes from _Object. 29 30Instances of Class describe classes with the attributes from _Object, 31plus the following: 32 super -- list of super classes (Class instances if possible); 33 methods -- mapping of method names to beginning line numbers. 34If the name of a super class is not recognized, the corresponding 35entry in the list of super classes is not a class instance but a 36string giving the name of the super class. Since import statements 37are recognized and imported modules are scanned as well, this 38shouldn't happen often. 39""" 40 41import io 42import sys 43import importlib.util 44import tokenize 45from token import NAME, DEDENT, OP 46 47__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] 48 49_modules = {} # Initialize cache of modules we've seen. 50 51 52class _Object: 53 "Informaton about Python class or function." 54 def __init__(self, module, name, file, lineno, parent): 55 self.module = module 56 self.name = name 57 self.file = file 58 self.lineno = lineno 59 self.parent = parent 60 self.children = {} 61 62 def _addchild(self, name, obj): 63 self.children[name] = obj 64 65 66class Function(_Object): 67 "Information about a Python function, including methods." 68 def __init__(self, module, name, file, lineno, parent=None): 69 _Object.__init__(self, module, name, file, lineno, parent) 70 71 72class Class(_Object): 73 "Information about a Python class." 74 def __init__(self, module, name, super, file, lineno, parent=None): 75 _Object.__init__(self, module, name, file, lineno, parent) 76 self.super = [] if super is None else super 77 self.methods = {} 78 79 def _addmethod(self, name, lineno): 80 self.methods[name] = lineno 81 82 83def _nest_function(ob, func_name, lineno): 84 "Return a Function after nesting within ob." 85 newfunc = Function(ob.module, func_name, ob.file, lineno, ob) 86 ob._addchild(func_name, newfunc) 87 if isinstance(ob, Class): 88 ob._addmethod(func_name, lineno) 89 return newfunc 90 91def _nest_class(ob, class_name, lineno, super=None): 92 "Return a Class after nesting within ob." 93 newclass = Class(ob.module, class_name, super, ob.file, lineno, ob) 94 ob._addchild(class_name, newclass) 95 return newclass 96 97def readmodule(module, path=None): 98 """Return Class objects for the top-level classes in module. 99 100 This is the original interface, before Functions were added. 101 """ 102 103 res = {} 104 for key, value in _readmodule(module, path or []).items(): 105 if isinstance(value, Class): 106 res[key] = value 107 return res 108 109def readmodule_ex(module, path=None): 110 """Return a dictionary with all functions and classes in module. 111 112 Search for module in PATH + sys.path. 113 If possible, include imported superclasses. 114 Do this by reading source, without importing (and executing) it. 115 """ 116 return _readmodule(module, path or []) 117 118def _readmodule(module, path, inpackage=None): 119 """Do the hard work for readmodule[_ex]. 120 121 If inpackage is given, it must be the dotted name of the package in 122 which we are searching for a submodule, and then PATH must be the 123 package search path; otherwise, we are searching for a top-level 124 module, and path is combined with sys.path. 125 """ 126 # Compute the full module name (prepending inpackage if set). 127 if inpackage is not None: 128 fullmodule = "%s.%s" % (inpackage, module) 129 else: 130 fullmodule = module 131 132 # Check in the cache. 133 if fullmodule in _modules: 134 return _modules[fullmodule] 135 136 # Initialize the dict for this module's contents. 137 tree = {} 138 139 # Check if it is a built-in module; we don't do much for these. 140 if module in sys.builtin_module_names and inpackage is None: 141 _modules[module] = tree 142 return tree 143 144 # Check for a dotted module name. 145 i = module.rfind('.') 146 if i >= 0: 147 package = module[:i] 148 submodule = module[i+1:] 149 parent = _readmodule(package, path, inpackage) 150 if inpackage is not None: 151 package = "%s.%s" % (inpackage, package) 152 if not '__path__' in parent: 153 raise ImportError('No package named {}'.format(package)) 154 return _readmodule(submodule, parent['__path__'], package) 155 156 # Search the path for the module. 157 f = None 158 if inpackage is not None: 159 search_path = path 160 else: 161 search_path = path + sys.path 162 spec = importlib.util._find_spec_from_path(fullmodule, search_path) 163 _modules[fullmodule] = tree 164 # Is module a package? 165 if spec.submodule_search_locations is not None: 166 tree['__path__'] = spec.submodule_search_locations 167 try: 168 source = spec.loader.get_source(fullmodule) 169 if source is None: 170 return tree 171 except (AttributeError, ImportError): 172 # If module is not Python source, we cannot do anything. 173 return tree 174 175 fname = spec.loader.get_filename(fullmodule) 176 return _create_tree(fullmodule, path, fname, source, tree, inpackage) 177 178 179def _create_tree(fullmodule, path, fname, source, tree, inpackage): 180 """Return the tree for a particular module. 181 182 fullmodule (full module name), inpackage+module, becomes o.module. 183 path is passed to recursive calls of _readmodule. 184 fname becomes o.file. 185 source is tokenized. Imports cause recursive calls to _readmodule. 186 tree is {} or {'__path__': <submodule search locations>}. 187 inpackage, None or string, is passed to recursive calls of _readmodule. 188 189 The effect of recursive calls is mutation of global _modules. 190 """ 191 f = io.StringIO(source) 192 193 stack = [] # Initialize stack of (class, indent) pairs. 194 195 g = tokenize.generate_tokens(f.readline) 196 try: 197 for tokentype, token, start, _end, _line in g: 198 if tokentype == DEDENT: 199 lineno, thisindent = start 200 # Close previous nested classes and defs. 201 while stack and stack[-1][1] >= thisindent: 202 del stack[-1] 203 elif token == 'def': 204 lineno, thisindent = start 205 # Close previous nested classes and defs. 206 while stack and stack[-1][1] >= thisindent: 207 del stack[-1] 208 tokentype, func_name, start = next(g)[0:3] 209 if tokentype != NAME: 210 continue # Skip def with syntax error. 211 cur_func = None 212 if stack: 213 cur_obj = stack[-1][0] 214 cur_func = _nest_function(cur_obj, func_name, lineno) 215 else: 216 # It is just a function. 217 cur_func = Function(fullmodule, func_name, fname, lineno) 218 tree[func_name] = cur_func 219 stack.append((cur_func, thisindent)) 220 elif token == 'class': 221 lineno, thisindent = start 222 # Close previous nested classes and defs. 223 while stack and stack[-1][1] >= thisindent: 224 del stack[-1] 225 tokentype, class_name, start = next(g)[0:3] 226 if tokentype != NAME: 227 continue # Skip class with syntax error. 228 # Parse what follows the class name. 229 tokentype, token, start = next(g)[0:3] 230 inherit = None 231 if token == '(': 232 names = [] # Initialize list of superclasses. 233 level = 1 234 super = [] # Tokens making up current superclass. 235 while True: 236 tokentype, token, start = next(g)[0:3] 237 if token in (')', ',') and level == 1: 238 n = "".join(super) 239 if n in tree: 240 # We know this super class. 241 n = tree[n] 242 else: 243 c = n.split('.') 244 if len(c) > 1: 245 # Super class form is module.class: 246 # look in module for class. 247 m = c[-2] 248 c = c[-1] 249 if m in _modules: 250 d = _modules[m] 251 if c in d: 252 n = d[c] 253 names.append(n) 254 super = [] 255 if token == '(': 256 level += 1 257 elif token == ')': 258 level -= 1 259 if level == 0: 260 break 261 elif token == ',' and level == 1: 262 pass 263 # Only use NAME and OP (== dot) tokens for type name. 264 elif tokentype in (NAME, OP) and level == 1: 265 super.append(token) 266 # Expressions in the base list are not supported. 267 inherit = names 268 if stack: 269 cur_obj = stack[-1][0] 270 cur_class = _nest_class( 271 cur_obj, class_name, lineno, inherit) 272 else: 273 cur_class = Class(fullmodule, class_name, inherit, 274 fname, lineno) 275 tree[class_name] = cur_class 276 stack.append((cur_class, thisindent)) 277 elif token == 'import' and start[1] == 0: 278 modules = _getnamelist(g) 279 for mod, _mod2 in modules: 280 try: 281 # Recursively read the imported module. 282 if inpackage is None: 283 _readmodule(mod, path) 284 else: 285 try: 286 _readmodule(mod, path, inpackage) 287 except ImportError: 288 _readmodule(mod, []) 289 except: 290 # If we can't find or parse the imported module, 291 # too bad -- don't die here. 292 pass 293 elif token == 'from' and start[1] == 0: 294 mod, token = _getname(g) 295 if not mod or token != "import": 296 continue 297 names = _getnamelist(g) 298 try: 299 # Recursively read the imported module. 300 d = _readmodule(mod, path, inpackage) 301 except: 302 # If we can't find or parse the imported module, 303 # too bad -- don't die here. 304 continue 305 # Add any classes that were defined in the imported module 306 # to our name space if they were mentioned in the list. 307 for n, n2 in names: 308 if n in d: 309 tree[n2 or n] = d[n] 310 elif n == '*': 311 # Don't add names that start with _. 312 for n in d: 313 if n[0] != '_': 314 tree[n] = d[n] 315 except StopIteration: 316 pass 317 318 f.close() 319 return tree 320 321 322def _getnamelist(g): 323 """Return list of (dotted-name, as-name or None) tuples for token source g. 324 325 An as-name is the name that follows 'as' in an as clause. 326 """ 327 names = [] 328 while True: 329 name, token = _getname(g) 330 if not name: 331 break 332 if token == 'as': 333 name2, token = _getname(g) 334 else: 335 name2 = None 336 names.append((name, name2)) 337 while token != "," and "\n" not in token: 338 token = next(g)[1] 339 if token != ",": 340 break 341 return names 342 343 344def _getname(g): 345 "Return (dotted-name or None, next-token) tuple for token source g." 346 parts = [] 347 tokentype, token = next(g)[0:2] 348 if tokentype != NAME and token != '*': 349 return (None, token) 350 parts.append(token) 351 while True: 352 tokentype, token = next(g)[0:2] 353 if token != '.': 354 break 355 tokentype, token = next(g)[0:2] 356 if tokentype != NAME: 357 break 358 parts.append(token) 359 return (".".join(parts), token) 360 361 362def _main(): 363 "Print module output (default this file) for quick visual check." 364 import os 365 try: 366 mod = sys.argv[1] 367 except: 368 mod = __file__ 369 if os.path.exists(mod): 370 path = [os.path.dirname(mod)] 371 mod = os.path.basename(mod) 372 if mod.lower().endswith(".py"): 373 mod = mod[:-3] 374 else: 375 path = [] 376 tree = readmodule_ex(mod, path) 377 lineno_key = lambda a: getattr(a, 'lineno', 0) 378 objs = sorted(tree.values(), key=lineno_key, reverse=True) 379 indent_level = 2 380 while objs: 381 obj = objs.pop() 382 if isinstance(obj, list): 383 # Value is a __path__ key. 384 continue 385 if not hasattr(obj, 'indent'): 386 obj.indent = 0 387 388 if isinstance(obj, _Object): 389 new_objs = sorted(obj.children.values(), 390 key=lineno_key, reverse=True) 391 for ob in new_objs: 392 ob.indent = obj.indent + indent_level 393 objs.extend(new_objs) 394 if isinstance(obj, Class): 395 print("{}class {} {} {}" 396 .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) 397 elif isinstance(obj, Function): 398 print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) 399 400if __name__ == "__main__": 401 _main() 402