• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Parse a Python module and describe its classes and functions.
2
3Parse enough of a Python file to recognize imports and class and
4function definitions, and to find out the superclasses of a class.
5
6The interface consists of a single function:
7    readmodule_ex(module, path=None)
8where module is the name of a Python module, and path is an optional
9list of directories where the module is to be searched.  If present,
10path is prepended to the system search path sys.path.  The return value
11is a dictionary.  The keys of the dictionary are the names of the
12classes and functions defined in the module (including classes that are
13defined via the from XXX import YYY construct).  The values are
14instances of classes Class and Function.  One special key/value pair is
15present for packages: the key '__path__' has a list as its value which
16contains the package search path.
17
18Classes and Functions have a common superclass: _Object.  Every instance
19has the following attributes:
20    module  -- name of the module;
21    name    -- name of the object;
22    file    -- file in which the object is defined;
23    lineno  -- line in the file where the object's definition starts;
24    end_lineno -- line in the file where the object's definition ends;
25    parent  -- parent of this object, if any;
26    children -- nested objects contained in this object.
27The 'children' attribute is a dictionary mapping names to objects.
28
29Instances of Function describe functions with the attributes from _Object,
30plus the following:
31    is_async -- if a function is defined with an 'async' prefix
32
33Instances of Class describe classes with the attributes from _Object,
34plus the following:
35    super   -- list of super classes (Class instances if possible);
36    methods -- mapping of method names to beginning line numbers.
37If the name of a super class is not recognized, the corresponding
38entry in the list of super classes is not a class instance but a
39string giving the name of the super class.  Since import statements
40are recognized and imported modules are scanned as well, this
41shouldn't happen often.
42"""
43
44import ast
45import sys
46import importlib.util
47
48__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
49
50_modules = {}  # Initialize cache of modules we've seen.
51
52
53class _Object:
54    "Information about Python class or function."
55    def __init__(self, module, name, file, lineno, end_lineno, parent):
56        self.module = module
57        self.name = name
58        self.file = file
59        self.lineno = lineno
60        self.end_lineno = end_lineno
61        self.parent = parent
62        self.children = {}
63        if parent is not None:
64            parent.children[name] = self
65
66
67# Odd Function and Class signatures are for back-compatibility.
68class Function(_Object):
69    "Information about a Python function, including methods."
70    def __init__(self, module, name, file, lineno,
71                 parent=None, is_async=False, *, end_lineno=None):
72        super().__init__(module, name, file, lineno, end_lineno, parent)
73        self.is_async = is_async
74        if isinstance(parent, Class):
75            parent.methods[name] = lineno
76
77
78class Class(_Object):
79    "Information about a Python class."
80    def __init__(self, module, name, super_, file, lineno,
81                 parent=None, *, end_lineno=None):
82        super().__init__(module, name, file, lineno, end_lineno, parent)
83        self.super = super_ or []
84        self.methods = {}
85
86
87# These 2 functions are used in these tests
88# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py
89def _nest_function(ob, func_name, lineno, end_lineno, is_async=False):
90    "Return a Function after nesting within ob."
91    return Function(ob.module, func_name, ob.file, lineno,
92                    parent=ob, is_async=is_async, end_lineno=end_lineno)
93
94def _nest_class(ob, class_name, lineno, end_lineno, super=None):
95    "Return a Class after nesting within ob."
96    return Class(ob.module, class_name, super, ob.file, lineno,
97                 parent=ob, end_lineno=end_lineno)
98
99
100def readmodule(module, path=None):
101    """Return Class objects for the top-level classes in module.
102
103    This is the original interface, before Functions were added.
104    """
105
106    res = {}
107    for key, value in _readmodule(module, path or []).items():
108        if isinstance(value, Class):
109            res[key] = value
110    return res
111
112def readmodule_ex(module, path=None):
113    """Return a dictionary with all functions and classes in module.
114
115    Search for module in PATH + sys.path.
116    If possible, include imported superclasses.
117    Do this by reading source, without importing (and executing) it.
118    """
119    return _readmodule(module, path or [])
120
121
122def _readmodule(module, path, inpackage=None):
123    """Do the hard work for readmodule[_ex].
124
125    If inpackage is given, it must be the dotted name of the package in
126    which we are searching for a submodule, and then PATH must be the
127    package search path; otherwise, we are searching for a top-level
128    module, and path is combined with sys.path.
129    """
130    # Compute the full module name (prepending inpackage if set).
131    if inpackage is not None:
132        fullmodule = "%s.%s" % (inpackage, module)
133    else:
134        fullmodule = module
135
136    # Check in the cache.
137    if fullmodule in _modules:
138        return _modules[fullmodule]
139
140    # Initialize the dict for this module's contents.
141    tree = {}
142
143    # Check if it is a built-in module; we don't do much for these.
144    if module in sys.builtin_module_names and inpackage is None:
145        _modules[module] = tree
146        return tree
147
148    # Check for a dotted module name.
149    i = module.rfind('.')
150    if i >= 0:
151        package = module[:i]
152        submodule = module[i+1:]
153        parent = _readmodule(package, path, inpackage)
154        if inpackage is not None:
155            package = "%s.%s" % (inpackage, package)
156        if not '__path__' in parent:
157            raise ImportError('No package named {}'.format(package))
158        return _readmodule(submodule, parent['__path__'], package)
159
160    # Search the path for the module.
161    f = None
162    if inpackage is not None:
163        search_path = path
164    else:
165        search_path = path + sys.path
166    spec = importlib.util._find_spec_from_path(fullmodule, search_path)
167    if spec is None:
168        raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule)
169    _modules[fullmodule] = tree
170    # Is module a package?
171    if spec.submodule_search_locations is not None:
172        tree['__path__'] = spec.submodule_search_locations
173    try:
174        source = spec.loader.get_source(fullmodule)
175    except (AttributeError, ImportError):
176        # If module is not Python source, we cannot do anything.
177        return tree
178    else:
179        if source is None:
180            return tree
181
182    fname = spec.loader.get_filename(fullmodule)
183    return _create_tree(fullmodule, path, fname, source, tree, inpackage)
184
185
186class _ModuleBrowser(ast.NodeVisitor):
187    def __init__(self, module, path, file, tree, inpackage):
188        self.path = path
189        self.tree = tree
190        self.file = file
191        self.module = module
192        self.inpackage = inpackage
193        self.stack = []
194
195    def visit_ClassDef(self, node):
196        bases = []
197        for base in node.bases:
198            name = ast.unparse(base)
199            if name in self.tree:
200                # We know this super class.
201                bases.append(self.tree[name])
202            elif len(names := name.split(".")) > 1:
203                # Super class form is module.class:
204                # look in module for class.
205                *_, module, class_ = names
206                if module in _modules:
207                    bases.append(_modules[module].get(class_, name))
208            else:
209                bases.append(name)
210
211        parent = self.stack[-1] if self.stack else None
212        class_ = Class(self.module, node.name, bases, self.file, node.lineno,
213                       parent=parent, end_lineno=node.end_lineno)
214        if parent is None:
215            self.tree[node.name] = class_
216        self.stack.append(class_)
217        self.generic_visit(node)
218        self.stack.pop()
219
220    def visit_FunctionDef(self, node, *, is_async=False):
221        parent = self.stack[-1] if self.stack else None
222        function = Function(self.module, node.name, self.file, node.lineno,
223                            parent, is_async, end_lineno=node.end_lineno)
224        if parent is None:
225            self.tree[node.name] = function
226        self.stack.append(function)
227        self.generic_visit(node)
228        self.stack.pop()
229
230    def visit_AsyncFunctionDef(self, node):
231        self.visit_FunctionDef(node, is_async=True)
232
233    def visit_Import(self, node):
234        if node.col_offset != 0:
235            return
236
237        for module in node.names:
238            try:
239                try:
240                    _readmodule(module.name, self.path, self.inpackage)
241                except ImportError:
242                    _readmodule(module.name, [])
243            except (ImportError, SyntaxError):
244                # If we can't find or parse the imported module,
245                # too bad -- don't die here.
246                continue
247
248    def visit_ImportFrom(self, node):
249        if node.col_offset != 0:
250            return
251        try:
252            module = "." * node.level
253            if node.module:
254                module += node.module
255            module = _readmodule(module, self.path, self.inpackage)
256        except (ImportError, SyntaxError):
257            return
258
259        for name in node.names:
260            if name.name in module:
261                self.tree[name.asname or name.name] = module[name.name]
262            elif name.name == "*":
263                for import_name, import_value in module.items():
264                    if import_name.startswith("_"):
265                        continue
266                    self.tree[import_name] = import_value
267
268
269def _create_tree(fullmodule, path, fname, source, tree, inpackage):
270    mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage)
271    mbrowser.visit(ast.parse(source))
272    return mbrowser.tree
273
274
275def _main():
276    "Print module output (default this file) for quick visual check."
277    import os
278    try:
279        mod = sys.argv[1]
280    except:
281        mod = __file__
282    if os.path.exists(mod):
283        path = [os.path.dirname(mod)]
284        mod = os.path.basename(mod)
285        if mod.lower().endswith(".py"):
286            mod = mod[:-3]
287    else:
288        path = []
289    tree = readmodule_ex(mod, path)
290    lineno_key = lambda a: getattr(a, 'lineno', 0)
291    objs = sorted(tree.values(), key=lineno_key, reverse=True)
292    indent_level = 2
293    while objs:
294        obj = objs.pop()
295        if isinstance(obj, list):
296            # Value is a __path__ key.
297            continue
298        if not hasattr(obj, 'indent'):
299            obj.indent = 0
300
301        if isinstance(obj, _Object):
302            new_objs = sorted(obj.children.values(),
303                              key=lineno_key, reverse=True)
304            for ob in new_objs:
305                ob.indent = obj.indent + indent_level
306            objs.extend(new_objs)
307        if isinstance(obj, Class):
308            print("{}class {} {} {}"
309                  .format(' ' * obj.indent, obj.name, obj.super, obj.lineno))
310        elif isinstance(obj, Function):
311            print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno))
312
313if __name__ == "__main__":
314    _main()
315