1"""Parse a Python module and describe its classes and methods. 2 3Parse enough of a Python file to recognize imports and class and 4method definitions, and to find out the superclasses of a class. 5 6The interface consists of a single function: 7 readmodule_ex(module [, path]) 8where module is the name of a Python module, and path is an optional 9list of directories where the module is to be searched. If present, 10path is prepended to the system search path sys.path. The return 11value is a dictionary. The keys of the dictionary are the names of 12the classes defined in the module (including classes that are defined 13via the from XXX import YYY construct). The values are class 14instances of the class Class defined here. One special key/value pair 15is present for packages: the key '__path__' has a list as its value 16which contains the package search path. 17 18A class is described by the class Class in this module. Instances 19of this class have the following instance variables: 20 module -- the module name 21 name -- the name of the class 22 super -- a list of super classes (Class instances) 23 methods -- a dictionary of methods 24 file -- the file in which the class was defined 25 lineno -- the line in the file on which the class statement occurred 26The dictionary of methods uses the method names as keys and the line 27numbers on which the method was defined as values. 28If the name of a super class is not recognized, the corresponding 29entry in the list of super classes is not a class instance but a 30string giving the name of the super class. Since import statements 31are recognized and imported modules are scanned as well, this 32shouldn't happen often. 33 34A function is described by the class Function in this module. 35Instances of this class have the following instance variables: 36 module -- the module name 37 name -- the name of the class 38 file -- the file in which the class was defined 39 lineno -- the line in the file on which the class statement occurred 40""" 41 42import sys 43import imp 44import tokenize 45from token import NAME, DEDENT, OP 46from operator import itemgetter 47 48__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] 49 50_modules = {} # cache of modules we've seen 51 52# each Python class is represented by an instance of this class 53class Class: 54 '''Class to represent a Python class.''' 55 def __init__(self, module, name, super, file, lineno): 56 self.module = module 57 self.name = name 58 if super is None: 59 super = [] 60 self.super = super 61 self.methods = {} 62 self.file = file 63 self.lineno = lineno 64 65 def _addmethod(self, name, lineno): 66 self.methods[name] = lineno 67 68class Function: 69 '''Class to represent a top-level Python function''' 70 def __init__(self, module, name, file, lineno): 71 self.module = module 72 self.name = name 73 self.file = file 74 self.lineno = lineno 75 76def readmodule(module, path=None): 77 '''Backwards compatible interface. 78 79 Call readmodule_ex() and then only keep Class objects from the 80 resulting dictionary.''' 81 82 res = {} 83 for key, value in _readmodule(module, path or []).items(): 84 if isinstance(value, Class): 85 res[key] = value 86 return res 87 88def readmodule_ex(module, path=None): 89 '''Read a module file and return a dictionary of classes. 90 91 Search for MODULE in PATH and sys.path, read and parse the 92 module and return a dictionary with one entry for each class 93 found in the module. 94 ''' 95 return _readmodule(module, path or []) 96 97def _readmodule(module, path, inpackage=None): 98 '''Do the hard work for readmodule[_ex]. 99 100 If INPACKAGE is given, it must be the dotted name of the package in 101 which we are searching for a submodule, and then PATH must be the 102 package search path; otherwise, we are searching for a top-level 103 module, and PATH is combined with sys.path. 104 ''' 105 # Compute the full module name (prepending inpackage if set) 106 if inpackage is not None: 107 fullmodule = "%s.%s" % (inpackage, module) 108 else: 109 fullmodule = module 110 111 # Check in the cache 112 if fullmodule in _modules: 113 return _modules[fullmodule] 114 115 # Initialize the dict for this module's contents 116 dict = {} 117 118 # Check if it is a built-in module; we don't do much for these 119 if module in sys.builtin_module_names and inpackage is None: 120 _modules[module] = dict 121 return dict 122 123 # Check for a dotted module name 124 i = module.rfind('.') 125 if i >= 0: 126 package = module[:i] 127 submodule = module[i+1:] 128 parent = _readmodule(package, path, inpackage) 129 if inpackage is not None: 130 package = "%s.%s" % (inpackage, package) 131 return _readmodule(submodule, parent['__path__'], package) 132 133 # Search the path for the module 134 f = None 135 if inpackage is not None: 136 f, fname, (_s, _m, ty) = imp.find_module(module, path) 137 else: 138 f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path) 139 if ty == imp.PKG_DIRECTORY: 140 dict['__path__'] = [fname] 141 path = [fname] + path 142 f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname]) 143 _modules[fullmodule] = dict 144 if ty != imp.PY_SOURCE: 145 # not Python source, can't do anything with this module 146 f.close() 147 return dict 148 149 stack = [] # stack of (class, indent) pairs 150 151 g = tokenize.generate_tokens(f.readline) 152 try: 153 for tokentype, token, start, _end, _line in g: 154 if tokentype == DEDENT: 155 lineno, thisindent = start 156 # close nested classes and defs 157 while stack and stack[-1][1] >= thisindent: 158 del stack[-1] 159 elif token == 'def': 160 lineno, thisindent = start 161 # close previous nested classes and defs 162 while stack and stack[-1][1] >= thisindent: 163 del stack[-1] 164 tokentype, meth_name, start = g.next()[0:3] 165 if tokentype != NAME: 166 continue # Syntax error 167 if stack: 168 cur_class = stack[-1][0] 169 if isinstance(cur_class, Class): 170 # it's a method 171 cur_class._addmethod(meth_name, lineno) 172 # else it's a nested def 173 else: 174 # it's a function 175 dict[meth_name] = Function(fullmodule, meth_name, 176 fname, lineno) 177 stack.append((None, thisindent)) # Marker for nested fns 178 elif token == 'class': 179 lineno, thisindent = start 180 # close previous nested classes and defs 181 while stack and stack[-1][1] >= thisindent: 182 del stack[-1] 183 tokentype, class_name, start = g.next()[0:3] 184 if tokentype != NAME: 185 continue # Syntax error 186 # parse what follows the class name 187 tokentype, token, start = g.next()[0:3] 188 inherit = None 189 if token == '(': 190 names = [] # List of superclasses 191 # there's a list of superclasses 192 level = 1 193 super = [] # Tokens making up current superclass 194 while True: 195 tokentype, token, start = g.next()[0:3] 196 if token in (')', ',') and level == 1: 197 n = "".join(super) 198 if n in dict: 199 # we know this super class 200 n = dict[n] 201 else: 202 c = n.split('.') 203 if len(c) > 1: 204 # super class is of the form 205 # module.class: look in module for 206 # class 207 m = c[-2] 208 c = c[-1] 209 if m in _modules: 210 d = _modules[m] 211 if c in d: 212 n = d[c] 213 names.append(n) 214 super = [] 215 if token == '(': 216 level += 1 217 elif token == ')': 218 level -= 1 219 if level == 0: 220 break 221 elif token == ',' and level == 1: 222 pass 223 # only use NAME and OP (== dot) tokens for type name 224 elif tokentype in (NAME, OP) and level == 1: 225 super.append(token) 226 # expressions in the base list are not supported 227 inherit = names 228 cur_class = Class(fullmodule, class_name, inherit, 229 fname, lineno) 230 if not stack: 231 dict[class_name] = cur_class 232 stack.append((cur_class, thisindent)) 233 elif token == 'import' and start[1] == 0: 234 modules = _getnamelist(g) 235 for mod, _mod2 in modules: 236 try: 237 # Recursively read the imported module 238 if inpackage is None: 239 _readmodule(mod, path) 240 else: 241 try: 242 _readmodule(mod, path, inpackage) 243 except ImportError: 244 _readmodule(mod, []) 245 except: 246 # If we can't find or parse the imported module, 247 # too bad -- don't die here. 248 pass 249 elif token == 'from' and start[1] == 0: 250 mod, token = _getname(g) 251 if not mod or token != "import": 252 continue 253 names = _getnamelist(g) 254 try: 255 # Recursively read the imported module 256 d = _readmodule(mod, path, inpackage) 257 except: 258 # If we can't find or parse the imported module, 259 # too bad -- don't die here. 260 continue 261 # add any classes that were defined in the imported module 262 # to our name space if they were mentioned in the list 263 for n, n2 in names: 264 if n in d: 265 dict[n2 or n] = d[n] 266 elif n == '*': 267 # don't add names that start with _ 268 for n in d: 269 if n[0] != '_': 270 dict[n] = d[n] 271 except StopIteration: 272 pass 273 274 f.close() 275 return dict 276 277def _getnamelist(g): 278 # Helper to get a comma-separated list of dotted names plus 'as' 279 # clauses. Return a list of pairs (name, name2) where name2 is 280 # the 'as' name, or None if there is no 'as' clause. 281 names = [] 282 while True: 283 name, token = _getname(g) 284 if not name: 285 break 286 if token == 'as': 287 name2, token = _getname(g) 288 else: 289 name2 = None 290 names.append((name, name2)) 291 while token != "," and "\n" not in token: 292 token = g.next()[1] 293 if token != ",": 294 break 295 return names 296 297def _getname(g): 298 # Helper to get a dotted name, return a pair (name, token) where 299 # name is the dotted name, or None if there was no dotted name, 300 # and token is the next input token. 301 parts = [] 302 tokentype, token = g.next()[0:2] 303 if tokentype != NAME and token != '*': 304 return (None, token) 305 parts.append(token) 306 while True: 307 tokentype, token = g.next()[0:2] 308 if token != '.': 309 break 310 tokentype, token = g.next()[0:2] 311 if tokentype != NAME: 312 break 313 parts.append(token) 314 return (".".join(parts), token) 315 316def _main(): 317 # Main program for testing. 318 import os 319 mod = sys.argv[1] 320 if os.path.exists(mod): 321 path = [os.path.dirname(mod)] 322 mod = os.path.basename(mod) 323 if mod.lower().endswith(".py"): 324 mod = mod[:-3] 325 else: 326 path = [] 327 dict = readmodule_ex(mod, path) 328 objs = dict.values() 329 objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0), 330 getattr(b, 'lineno', 0))) 331 for obj in objs: 332 if isinstance(obj, Class): 333 print "class", obj.name, obj.super, obj.lineno 334 methods = sorted(obj.methods.iteritems(), key=itemgetter(1)) 335 for name, lineno in methods: 336 if name != "__path__": 337 print " def", name, lineno 338 elif isinstance(obj, Function): 339 print "def", obj.name, obj.lineno 340 341if __name__ == "__main__": 342 _main() 343