• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Disassembler of Python byte code into mnemonics."""
2
3import sys
4import types
5import collections
6import io
7
8from opcode import *
9from opcode import __all__ as _opcodes_all
10
11__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
12           "findlinestarts", "findlabels", "show_code",
13           "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
14del _opcodes_all
15
16_have_code = (types.MethodType, types.FunctionType, types.CodeType,
17              classmethod, staticmethod, type)
18
19FORMAT_VALUE = opmap['FORMAT_VALUE']
20FORMAT_VALUE_CONVERTERS = (
21    (None, ''),
22    (str, 'str'),
23    (repr, 'repr'),
24    (ascii, 'ascii'),
25)
26MAKE_FUNCTION = opmap['MAKE_FUNCTION']
27MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure')
28
29
30def _try_compile(source, name):
31    """Attempts to compile the given source, first as an expression and
32       then as a statement if the first approach fails.
33
34       Utility function to accept strings in functions that otherwise
35       expect code objects
36    """
37    try:
38        c = compile(source, name, 'eval')
39    except SyntaxError:
40        c = compile(source, name, 'exec')
41    return c
42
43def dis(x=None, *, file=None, depth=None):
44    """Disassemble classes, methods, functions, and other compiled objects.
45
46    With no argument, disassemble the last traceback.
47
48    Compiled objects currently include generator objects, async generator
49    objects, and coroutine objects, all of which store their code object
50    in a special attribute.
51    """
52    if x is None:
53        distb(file=file)
54        return
55    # Extract functions from methods.
56    if hasattr(x, '__func__'):
57        x = x.__func__
58    # Extract compiled code objects from...
59    if hasattr(x, '__code__'):  # ...a function, or
60        x = x.__code__
61    elif hasattr(x, 'gi_code'):  #...a generator object, or
62        x = x.gi_code
63    elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
64        x = x.ag_code
65    elif hasattr(x, 'cr_code'):  #...a coroutine.
66        x = x.cr_code
67    # Perform the disassembly.
68    if hasattr(x, '__dict__'):  # Class or module
69        items = sorted(x.__dict__.items())
70        for name, x1 in items:
71            if isinstance(x1, _have_code):
72                print("Disassembly of %s:" % name, file=file)
73                try:
74                    dis(x1, file=file, depth=depth)
75                except TypeError as msg:
76                    print("Sorry:", msg, file=file)
77                print(file=file)
78    elif hasattr(x, 'co_code'): # Code object
79        _disassemble_recursive(x, file=file, depth=depth)
80    elif isinstance(x, (bytes, bytearray)): # Raw bytecode
81        _disassemble_bytes(x, file=file)
82    elif isinstance(x, str):    # Source code
83        _disassemble_str(x, file=file, depth=depth)
84    else:
85        raise TypeError("don't know how to disassemble %s objects" %
86                        type(x).__name__)
87
88def distb(tb=None, *, file=None):
89    """Disassemble a traceback (default: last traceback)."""
90    if tb is None:
91        try:
92            tb = sys.last_traceback
93        except AttributeError:
94            raise RuntimeError("no last traceback to disassemble") from None
95        while tb.tb_next: tb = tb.tb_next
96    disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
97
98# The inspect module interrogates this dictionary to build its
99# list of CO_* constants. It is also used by pretty_flags to
100# turn the co_flags field into a human readable list.
101COMPILER_FLAG_NAMES = {
102     1: "OPTIMIZED",
103     2: "NEWLOCALS",
104     4: "VARARGS",
105     8: "VARKEYWORDS",
106    16: "NESTED",
107    32: "GENERATOR",
108    64: "NOFREE",
109   128: "COROUTINE",
110   256: "ITERABLE_COROUTINE",
111   512: "ASYNC_GENERATOR",
112}
113
114def pretty_flags(flags):
115    """Return pretty representation of code flags."""
116    names = []
117    for i in range(32):
118        flag = 1<<i
119        if flags & flag:
120            names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
121            flags ^= flag
122            if not flags:
123                break
124    else:
125        names.append(hex(flags))
126    return ", ".join(names)
127
128def _get_code_object(x):
129    """Helper to handle methods, compiled or raw code objects, and strings."""
130    # Extract functions from methods.
131    if hasattr(x, '__func__'):
132        x = x.__func__
133    # Extract compiled code objects from...
134    if hasattr(x, '__code__'):  # ...a function, or
135        x = x.__code__
136    elif hasattr(x, 'gi_code'):  #...a generator object, or
137        x = x.gi_code
138    elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
139        x = x.ag_code
140    elif hasattr(x, 'cr_code'):  #...a coroutine.
141        x = x.cr_code
142    # Handle source code.
143    if isinstance(x, str):
144        x = _try_compile(x, "<disassembly>")
145    # By now, if we don't have a code object, we can't disassemble x.
146    if hasattr(x, 'co_code'):
147        return x
148    raise TypeError("don't know how to disassemble %s objects" %
149                    type(x).__name__)
150
151def code_info(x):
152    """Formatted details of methods, functions, or code."""
153    return _format_code_info(_get_code_object(x))
154
155def _format_code_info(co):
156    lines = []
157    lines.append("Name:              %s" % co.co_name)
158    lines.append("Filename:          %s" % co.co_filename)
159    lines.append("Argument count:    %s" % co.co_argcount)
160    lines.append("Positional-only arguments: %s" % co.co_posonlyargcount)
161    lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
162    lines.append("Number of locals:  %s" % co.co_nlocals)
163    lines.append("Stack size:        %s" % co.co_stacksize)
164    lines.append("Flags:             %s" % pretty_flags(co.co_flags))
165    if co.co_consts:
166        lines.append("Constants:")
167        for i_c in enumerate(co.co_consts):
168            lines.append("%4d: %r" % i_c)
169    if co.co_names:
170        lines.append("Names:")
171        for i_n in enumerate(co.co_names):
172            lines.append("%4d: %s" % i_n)
173    if co.co_varnames:
174        lines.append("Variable names:")
175        for i_n in enumerate(co.co_varnames):
176            lines.append("%4d: %s" % i_n)
177    if co.co_freevars:
178        lines.append("Free variables:")
179        for i_n in enumerate(co.co_freevars):
180            lines.append("%4d: %s" % i_n)
181    if co.co_cellvars:
182        lines.append("Cell variables:")
183        for i_n in enumerate(co.co_cellvars):
184            lines.append("%4d: %s" % i_n)
185    return "\n".join(lines)
186
187def show_code(co, *, file=None):
188    """Print details of methods, functions, or code to *file*.
189
190    If *file* is not provided, the output is printed on stdout.
191    """
192    print(code_info(co), file=file)
193
194_Instruction = collections.namedtuple("_Instruction",
195     "opname opcode arg argval argrepr offset starts_line is_jump_target")
196
197_Instruction.opname.__doc__ = "Human readable name for operation"
198_Instruction.opcode.__doc__ = "Numeric code for operation"
199_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
200_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
201_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
202_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
203_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
204_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
205
206_OPNAME_WIDTH = 20
207_OPARG_WIDTH = 5
208
209class Instruction(_Instruction):
210    """Details for a bytecode operation
211
212       Defined fields:
213         opname - human readable name for operation
214         opcode - numeric code for operation
215         arg - numeric argument to operation (if any), otherwise None
216         argval - resolved arg value (if known), otherwise same as arg
217         argrepr - human readable description of operation argument
218         offset - start index of operation within bytecode sequence
219         starts_line - line started by this opcode (if any), otherwise None
220         is_jump_target - True if other code jumps to here, otherwise False
221    """
222
223    def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
224        """Format instruction details for inclusion in disassembly output
225
226        *lineno_width* sets the width of the line number field (0 omits it)
227        *mark_as_current* inserts a '-->' marker arrow as part of the line
228        *offset_width* sets the width of the instruction offset field
229        """
230        fields = []
231        # Column: Source code line number
232        if lineno_width:
233            if self.starts_line is not None:
234                lineno_fmt = "%%%dd" % lineno_width
235                fields.append(lineno_fmt % self.starts_line)
236            else:
237                fields.append(' ' * lineno_width)
238        # Column: Current instruction indicator
239        if mark_as_current:
240            fields.append('-->')
241        else:
242            fields.append('   ')
243        # Column: Jump target marker
244        if self.is_jump_target:
245            fields.append('>>')
246        else:
247            fields.append('  ')
248        # Column: Instruction offset from start of code sequence
249        fields.append(repr(self.offset).rjust(offset_width))
250        # Column: Opcode name
251        fields.append(self.opname.ljust(_OPNAME_WIDTH))
252        # Column: Opcode argument
253        if self.arg is not None:
254            fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
255            # Column: Opcode argument details
256            if self.argrepr:
257                fields.append('(' + self.argrepr + ')')
258        return ' '.join(fields).rstrip()
259
260
261def get_instructions(x, *, first_line=None):
262    """Iterator for the opcodes in methods, functions or code
263
264    Generates a series of Instruction named tuples giving the details of
265    each operations in the supplied code.
266
267    If *first_line* is not None, it indicates the line number that should
268    be reported for the first source line in the disassembled code.
269    Otherwise, the source line information (if any) is taken directly from
270    the disassembled code object.
271    """
272    co = _get_code_object(x)
273    cell_names = co.co_cellvars + co.co_freevars
274    linestarts = dict(findlinestarts(co))
275    if first_line is not None:
276        line_offset = first_line - co.co_firstlineno
277    else:
278        line_offset = 0
279    return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
280                                   co.co_consts, cell_names, linestarts,
281                                   line_offset)
282
283def _get_const_info(const_index, const_list):
284    """Helper to get optional details about const references
285
286       Returns the dereferenced constant and its repr if the constant
287       list is defined.
288       Otherwise returns the constant index and its repr().
289    """
290    argval = const_index
291    if const_list is not None:
292        argval = const_list[const_index]
293    return argval, repr(argval)
294
295def _get_name_info(name_index, name_list):
296    """Helper to get optional details about named references
297
298       Returns the dereferenced name as both value and repr if the name
299       list is defined.
300       Otherwise returns the name index and its repr().
301    """
302    argval = name_index
303    if name_list is not None:
304        argval = name_list[name_index]
305        argrepr = argval
306    else:
307        argrepr = repr(argval)
308    return argval, argrepr
309
310
311def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
312                      cells=None, linestarts=None, line_offset=0):
313    """Iterate over the instructions in a bytecode string.
314
315    Generates a sequence of Instruction namedtuples giving the details of each
316    opcode.  Additional information about the code's runtime environment
317    (e.g. variable names, constants) can be specified using optional
318    arguments.
319
320    """
321    labels = findlabels(code)
322    starts_line = None
323    for offset, op, arg in _unpack_opargs(code):
324        if linestarts is not None:
325            starts_line = linestarts.get(offset, None)
326            if starts_line is not None:
327                starts_line += line_offset
328        is_jump_target = offset in labels
329        argval = None
330        argrepr = ''
331        if arg is not None:
332            #  Set argval to the dereferenced value of the argument when
333            #  available, and argrepr to the string representation of argval.
334            #    _disassemble_bytes needs the string repr of the
335            #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
336            argval = arg
337            if op in hasconst:
338                argval, argrepr = _get_const_info(arg, constants)
339            elif op in hasname:
340                argval, argrepr = _get_name_info(arg, names)
341            elif op in hasjrel:
342                argval = offset + 2 + arg
343                argrepr = "to " + repr(argval)
344            elif op in haslocal:
345                argval, argrepr = _get_name_info(arg, varnames)
346            elif op in hascompare:
347                argval = cmp_op[arg]
348                argrepr = argval
349            elif op in hasfree:
350                argval, argrepr = _get_name_info(arg, cells)
351            elif op == FORMAT_VALUE:
352                argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
353                argval = (argval, bool(arg & 0x4))
354                if argval[1]:
355                    if argrepr:
356                        argrepr += ', '
357                    argrepr += 'with format'
358            elif op == MAKE_FUNCTION:
359                argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS)
360                                    if arg & (1<<i))
361        yield Instruction(opname[op], op,
362                          arg, argval, argrepr,
363                          offset, starts_line, is_jump_target)
364
365def disassemble(co, lasti=-1, *, file=None):
366    """Disassemble a code object."""
367    cell_names = co.co_cellvars + co.co_freevars
368    linestarts = dict(findlinestarts(co))
369    _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
370                       co.co_consts, cell_names, linestarts, file=file)
371
372def _disassemble_recursive(co, *, file=None, depth=None):
373    disassemble(co, file=file)
374    if depth is None or depth > 0:
375        if depth is not None:
376            depth = depth - 1
377        for x in co.co_consts:
378            if hasattr(x, 'co_code'):
379                print(file=file)
380                print("Disassembly of %r:" % (x,), file=file)
381                _disassemble_recursive(x, file=file, depth=depth)
382
383def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
384                       constants=None, cells=None, linestarts=None,
385                       *, file=None, line_offset=0):
386    # Omit the line number column entirely if we have no line number info
387    show_lineno = linestarts is not None
388    if show_lineno:
389        maxlineno = max(linestarts.values()) + line_offset
390        if maxlineno >= 1000:
391            lineno_width = len(str(maxlineno))
392        else:
393            lineno_width = 3
394    else:
395        lineno_width = 0
396    maxoffset = len(code) - 2
397    if maxoffset >= 10000:
398        offset_width = len(str(maxoffset))
399    else:
400        offset_width = 4
401    for instr in _get_instructions_bytes(code, varnames, names,
402                                         constants, cells, linestarts,
403                                         line_offset=line_offset):
404        new_source_line = (show_lineno and
405                           instr.starts_line is not None and
406                           instr.offset > 0)
407        if new_source_line:
408            print(file=file)
409        is_current_instr = instr.offset == lasti
410        print(instr._disassemble(lineno_width, is_current_instr, offset_width),
411              file=file)
412
413def _disassemble_str(source, **kwargs):
414    """Compile the source string, then disassemble the code object."""
415    _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs)
416
417disco = disassemble                     # XXX For backwards compatibility
418
419def _unpack_opargs(code):
420    extended_arg = 0
421    for i in range(0, len(code), 2):
422        op = code[i]
423        if op >= HAVE_ARGUMENT:
424            arg = code[i+1] | extended_arg
425            extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
426        else:
427            arg = None
428        yield (i, op, arg)
429
430def findlabels(code):
431    """Detect all offsets in a byte code which are jump targets.
432
433    Return the list of offsets.
434
435    """
436    labels = []
437    for offset, op, arg in _unpack_opargs(code):
438        if arg is not None:
439            if op in hasjrel:
440                label = offset + 2 + arg
441            elif op in hasjabs:
442                label = arg
443            else:
444                continue
445            if label not in labels:
446                labels.append(label)
447    return labels
448
449def findlinestarts(code):
450    """Find the offsets in a byte code which are start of lines in the source.
451
452    Generate pairs (offset, lineno) as described in Python/compile.c.
453
454    """
455    byte_increments = code.co_lnotab[0::2]
456    line_increments = code.co_lnotab[1::2]
457    bytecode_len = len(code.co_code)
458
459    lastlineno = None
460    lineno = code.co_firstlineno
461    addr = 0
462    for byte_incr, line_incr in zip(byte_increments, line_increments):
463        if byte_incr:
464            if lineno != lastlineno:
465                yield (addr, lineno)
466                lastlineno = lineno
467            addr += byte_incr
468            if addr >= bytecode_len:
469                # The rest of the lnotab byte offsets are past the end of
470                # the bytecode, so the lines were optimized away.
471                return
472        if line_incr >= 0x80:
473            # line_increments is an array of 8-bit signed integers
474            line_incr -= 0x100
475        lineno += line_incr
476    if lineno != lastlineno:
477        yield (addr, lineno)
478
479class Bytecode:
480    """The bytecode operations of a piece of code
481
482    Instantiate this with a function, method, other compiled object, string of
483    code, or a code object (as returned by compile()).
484
485    Iterating over this yields the bytecode operations as Instruction instances.
486    """
487    def __init__(self, x, *, first_line=None, current_offset=None):
488        self.codeobj = co = _get_code_object(x)
489        if first_line is None:
490            self.first_line = co.co_firstlineno
491            self._line_offset = 0
492        else:
493            self.first_line = first_line
494            self._line_offset = first_line - co.co_firstlineno
495        self._cell_names = co.co_cellvars + co.co_freevars
496        self._linestarts = dict(findlinestarts(co))
497        self._original_object = x
498        self.current_offset = current_offset
499
500    def __iter__(self):
501        co = self.codeobj
502        return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
503                                       co.co_consts, self._cell_names,
504                                       self._linestarts,
505                                       line_offset=self._line_offset)
506
507    def __repr__(self):
508        return "{}({!r})".format(self.__class__.__name__,
509                                 self._original_object)
510
511    @classmethod
512    def from_traceback(cls, tb):
513        """ Construct a Bytecode from the given traceback """
514        while tb.tb_next:
515            tb = tb.tb_next
516        return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
517
518    def info(self):
519        """Return formatted information about the code object."""
520        return _format_code_info(self.codeobj)
521
522    def dis(self):
523        """Return a formatted view of the bytecode operations."""
524        co = self.codeobj
525        if self.current_offset is not None:
526            offset = self.current_offset
527        else:
528            offset = -1
529        with io.StringIO() as output:
530            _disassemble_bytes(co.co_code, varnames=co.co_varnames,
531                               names=co.co_names, constants=co.co_consts,
532                               cells=self._cell_names,
533                               linestarts=self._linestarts,
534                               line_offset=self._line_offset,
535                               file=output,
536                               lasti=offset)
537            return output.getvalue()
538
539
540def _test():
541    """Simple test program to disassemble a file."""
542    import argparse
543
544    parser = argparse.ArgumentParser()
545    parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-')
546    args = parser.parse_args()
547    with args.infile as infile:
548        source = infile.read()
549    code = compile(source, args.infile.name, "exec")
550    dis(code)
551
552if __name__ == "__main__":
553    _test()
554