1"""Parse a Python module and describe its classes and functions. 2 3Parse enough of a Python file to recognize imports and class and 4function definitions, and to find out the superclasses of a class. 5 6The interface consists of a single function: 7 readmodule_ex(module, path=None) 8where module is the name of a Python module, and path is an optional 9list of directories where the module is to be searched. If present, 10path is prepended to the system search path sys.path. The return value 11is a dictionary. The keys of the dictionary are the names of the 12classes and functions defined in the module (including classes that are 13defined via the from XXX import YYY construct). The values are 14instances of classes Class and Function. One special key/value pair is 15present for packages: the key '__path__' has a list as its value which 16contains the package search path. 17 18Classes and Functions have a common superclass: _Object. Every instance 19has the following attributes: 20 module -- name of the module; 21 name -- name of the object; 22 file -- file in which the object is defined; 23 lineno -- line in the file where the object's definition starts; 24 end_lineno -- line in the file where the object's definition ends; 25 parent -- parent of this object, if any; 26 children -- nested objects contained in this object. 27The 'children' attribute is a dictionary mapping names to objects. 28 29Instances of Function describe functions with the attributes from _Object, 30plus the following: 31 is_async -- if a function is defined with an 'async' prefix 32 33Instances of Class describe classes with the attributes from _Object, 34plus the following: 35 super -- list of super classes (Class instances if possible); 36 methods -- mapping of method names to beginning line numbers. 37If the name of a super class is not recognized, the corresponding 38entry in the list of super classes is not a class instance but a 39string giving the name of the super class. Since import statements 40are recognized and imported modules are scanned as well, this 41shouldn't happen often. 42""" 43 44import ast 45import sys 46import importlib.util 47 48__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] 49 50_modules = {} # Initialize cache of modules we've seen. 51 52 53class _Object: 54 "Information about Python class or function." 55 def __init__(self, module, name, file, lineno, end_lineno, parent): 56 self.module = module 57 self.name = name 58 self.file = file 59 self.lineno = lineno 60 self.end_lineno = end_lineno 61 self.parent = parent 62 self.children = {} 63 if parent is not None: 64 parent.children[name] = self 65 66 67# Odd Function and Class signatures are for back-compatibility. 68class Function(_Object): 69 "Information about a Python function, including methods." 70 def __init__(self, module, name, file, lineno, 71 parent=None, is_async=False, *, end_lineno=None): 72 super().__init__(module, name, file, lineno, end_lineno, parent) 73 self.is_async = is_async 74 if isinstance(parent, Class): 75 parent.methods[name] = lineno 76 77 78class Class(_Object): 79 "Information about a Python class." 80 def __init__(self, module, name, super_, file, lineno, 81 parent=None, *, end_lineno=None): 82 super().__init__(module, name, file, lineno, end_lineno, parent) 83 self.super = super_ or [] 84 self.methods = {} 85 86 87# These 2 functions are used in these tests 88# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py 89def _nest_function(ob, func_name, lineno, end_lineno, is_async=False): 90 "Return a Function after nesting within ob." 91 return Function(ob.module, func_name, ob.file, lineno, 92 parent=ob, is_async=is_async, end_lineno=end_lineno) 93 94def _nest_class(ob, class_name, lineno, end_lineno, super=None): 95 "Return a Class after nesting within ob." 96 return Class(ob.module, class_name, super, ob.file, lineno, 97 parent=ob, end_lineno=end_lineno) 98 99 100def readmodule(module, path=None): 101 """Return Class objects for the top-level classes in module. 102 103 This is the original interface, before Functions were added. 104 """ 105 106 res = {} 107 for key, value in _readmodule(module, path or []).items(): 108 if isinstance(value, Class): 109 res[key] = value 110 return res 111 112def readmodule_ex(module, path=None): 113 """Return a dictionary with all functions and classes in module. 114 115 Search for module in PATH + sys.path. 116 If possible, include imported superclasses. 117 Do this by reading source, without importing (and executing) it. 118 """ 119 return _readmodule(module, path or []) 120 121 122def _readmodule(module, path, inpackage=None): 123 """Do the hard work for readmodule[_ex]. 124 125 If inpackage is given, it must be the dotted name of the package in 126 which we are searching for a submodule, and then PATH must be the 127 package search path; otherwise, we are searching for a top-level 128 module, and path is combined with sys.path. 129 """ 130 # Compute the full module name (prepending inpackage if set). 131 if inpackage is not None: 132 fullmodule = "%s.%s" % (inpackage, module) 133 else: 134 fullmodule = module 135 136 # Check in the cache. 137 if fullmodule in _modules: 138 return _modules[fullmodule] 139 140 # Initialize the dict for this module's contents. 141 tree = {} 142 143 # Check if it is a built-in module; we don't do much for these. 144 if module in sys.builtin_module_names and inpackage is None: 145 _modules[module] = tree 146 return tree 147 148 # Check for a dotted module name. 149 i = module.rfind('.') 150 if i >= 0: 151 package = module[:i] 152 submodule = module[i+1:] 153 parent = _readmodule(package, path, inpackage) 154 if inpackage is not None: 155 package = "%s.%s" % (inpackage, package) 156 if not '__path__' in parent: 157 raise ImportError('No package named {}'.format(package)) 158 return _readmodule(submodule, parent['__path__'], package) 159 160 # Search the path for the module. 161 f = None 162 if inpackage is not None: 163 search_path = path 164 else: 165 search_path = path + sys.path 166 spec = importlib.util._find_spec_from_path(fullmodule, search_path) 167 if spec is None: 168 raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule) 169 _modules[fullmodule] = tree 170 # Is module a package? 171 if spec.submodule_search_locations is not None: 172 tree['__path__'] = spec.submodule_search_locations 173 try: 174 source = spec.loader.get_source(fullmodule) 175 except (AttributeError, ImportError): 176 # If module is not Python source, we cannot do anything. 177 return tree 178 else: 179 if source is None: 180 return tree 181 182 fname = spec.loader.get_filename(fullmodule) 183 return _create_tree(fullmodule, path, fname, source, tree, inpackage) 184 185 186class _ModuleBrowser(ast.NodeVisitor): 187 def __init__(self, module, path, file, tree, inpackage): 188 self.path = path 189 self.tree = tree 190 self.file = file 191 self.module = module 192 self.inpackage = inpackage 193 self.stack = [] 194 195 def visit_ClassDef(self, node): 196 bases = [] 197 for base in node.bases: 198 name = ast.unparse(base) 199 if name in self.tree: 200 # We know this super class. 201 bases.append(self.tree[name]) 202 elif len(names := name.split(".")) > 1: 203 # Super class form is module.class: 204 # look in module for class. 205 *_, module, class_ = names 206 if module in _modules: 207 bases.append(_modules[module].get(class_, name)) 208 else: 209 bases.append(name) 210 211 parent = self.stack[-1] if self.stack else None 212 class_ = Class(self.module, node.name, bases, self.file, node.lineno, 213 parent=parent, end_lineno=node.end_lineno) 214 if parent is None: 215 self.tree[node.name] = class_ 216 self.stack.append(class_) 217 self.generic_visit(node) 218 self.stack.pop() 219 220 def visit_FunctionDef(self, node, *, is_async=False): 221 parent = self.stack[-1] if self.stack else None 222 function = Function(self.module, node.name, self.file, node.lineno, 223 parent, is_async, end_lineno=node.end_lineno) 224 if parent is None: 225 self.tree[node.name] = function 226 self.stack.append(function) 227 self.generic_visit(node) 228 self.stack.pop() 229 230 def visit_AsyncFunctionDef(self, node): 231 self.visit_FunctionDef(node, is_async=True) 232 233 def visit_Import(self, node): 234 if node.col_offset != 0: 235 return 236 237 for module in node.names: 238 try: 239 try: 240 _readmodule(module.name, self.path, self.inpackage) 241 except ImportError: 242 _readmodule(module.name, []) 243 except (ImportError, SyntaxError): 244 # If we can't find or parse the imported module, 245 # too bad -- don't die here. 246 continue 247 248 def visit_ImportFrom(self, node): 249 if node.col_offset != 0: 250 return 251 try: 252 module = "." * node.level 253 if node.module: 254 module += node.module 255 module = _readmodule(module, self.path, self.inpackage) 256 except (ImportError, SyntaxError): 257 return 258 259 for name in node.names: 260 if name.name in module: 261 self.tree[name.asname or name.name] = module[name.name] 262 elif name.name == "*": 263 for import_name, import_value in module.items(): 264 if import_name.startswith("_"): 265 continue 266 self.tree[import_name] = import_value 267 268 269def _create_tree(fullmodule, path, fname, source, tree, inpackage): 270 mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage) 271 mbrowser.visit(ast.parse(source)) 272 return mbrowser.tree 273 274 275def _main(): 276 "Print module output (default this file) for quick visual check." 277 import os 278 try: 279 mod = sys.argv[1] 280 except: 281 mod = __file__ 282 if os.path.exists(mod): 283 path = [os.path.dirname(mod)] 284 mod = os.path.basename(mod) 285 if mod.lower().endswith(".py"): 286 mod = mod[:-3] 287 else: 288 path = [] 289 tree = readmodule_ex(mod, path) 290 lineno_key = lambda a: getattr(a, 'lineno', 0) 291 objs = sorted(tree.values(), key=lineno_key, reverse=True) 292 indent_level = 2 293 while objs: 294 obj = objs.pop() 295 if isinstance(obj, list): 296 # Value is a __path__ key. 297 continue 298 if not hasattr(obj, 'indent'): 299 obj.indent = 0 300 301 if isinstance(obj, _Object): 302 new_objs = sorted(obj.children.values(), 303 key=lineno_key, reverse=True) 304 for ob in new_objs: 305 ob.indent = obj.indent + indent_level 306 objs.extend(new_objs) 307 if isinstance(obj, Class): 308 print("{}class {} {} {}" 309 .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) 310 elif isinstance(obj, Function): 311 print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) 312 313if __name__ == "__main__": 314 _main() 315