• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Disassembler of Python byte code into mnemonics."""
2
3import sys
4import types
5import collections
6import io
7
8from opcode import *
9from opcode import __all__ as _opcodes_all
10
11__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
12           "findlinestarts", "findlabels", "show_code",
13           "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
14del _opcodes_all
15
16_have_code = (types.MethodType, types.FunctionType, types.CodeType,
17              classmethod, staticmethod, type)
18
19FORMAT_VALUE = opmap['FORMAT_VALUE']
20
21def _try_compile(source, name):
22    """Attempts to compile the given source, first as an expression and
23       then as a statement if the first approach fails.
24
25       Utility function to accept strings in functions that otherwise
26       expect code objects
27    """
28    try:
29        c = compile(source, name, 'eval')
30    except SyntaxError:
31        c = compile(source, name, 'exec')
32    return c
33
34def dis(x=None, *, file=None, depth=None):
35    """Disassemble classes, methods, functions, and other compiled objects.
36
37    With no argument, disassemble the last traceback.
38
39    Compiled objects currently include generator objects, async generator
40    objects, and coroutine objects, all of which store their code object
41    in a special attribute.
42    """
43    if x is None:
44        distb(file=file)
45        return
46    # Extract functions from methods.
47    if hasattr(x, '__func__'):
48        x = x.__func__
49    # Extract compiled code objects from...
50    if hasattr(x, '__code__'):  # ...a function, or
51        x = x.__code__
52    elif hasattr(x, 'gi_code'):  #...a generator object, or
53        x = x.gi_code
54    elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
55        x = x.ag_code
56    elif hasattr(x, 'cr_code'):  #...a coroutine.
57        x = x.cr_code
58    # Perform the disassembly.
59    if hasattr(x, '__dict__'):  # Class or module
60        items = sorted(x.__dict__.items())
61        for name, x1 in items:
62            if isinstance(x1, _have_code):
63                print("Disassembly of %s:" % name, file=file)
64                try:
65                    dis(x1, file=file, depth=depth)
66                except TypeError as msg:
67                    print("Sorry:", msg, file=file)
68                print(file=file)
69    elif hasattr(x, 'co_code'): # Code object
70        _disassemble_recursive(x, file=file, depth=depth)
71    elif isinstance(x, (bytes, bytearray)): # Raw bytecode
72        _disassemble_bytes(x, file=file)
73    elif isinstance(x, str):    # Source code
74        _disassemble_str(x, file=file, depth=depth)
75    else:
76        raise TypeError("don't know how to disassemble %s objects" %
77                        type(x).__name__)
78
79def distb(tb=None, *, file=None):
80    """Disassemble a traceback (default: last traceback)."""
81    if tb is None:
82        try:
83            tb = sys.last_traceback
84        except AttributeError:
85            raise RuntimeError("no last traceback to disassemble") from None
86        while tb.tb_next: tb = tb.tb_next
87    disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
88
89# The inspect module interrogates this dictionary to build its
90# list of CO_* constants. It is also used by pretty_flags to
91# turn the co_flags field into a human readable list.
92COMPILER_FLAG_NAMES = {
93     1: "OPTIMIZED",
94     2: "NEWLOCALS",
95     4: "VARARGS",
96     8: "VARKEYWORDS",
97    16: "NESTED",
98    32: "GENERATOR",
99    64: "NOFREE",
100   128: "COROUTINE",
101   256: "ITERABLE_COROUTINE",
102   512: "ASYNC_GENERATOR",
103}
104
105def pretty_flags(flags):
106    """Return pretty representation of code flags."""
107    names = []
108    for i in range(32):
109        flag = 1<<i
110        if flags & flag:
111            names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
112            flags ^= flag
113            if not flags:
114                break
115    else:
116        names.append(hex(flags))
117    return ", ".join(names)
118
119def _get_code_object(x):
120    """Helper to handle methods, compiled or raw code objects, and strings."""
121    # Extract functions from methods.
122    if hasattr(x, '__func__'):
123        x = x.__func__
124    # Extract compiled code objects from...
125    if hasattr(x, '__code__'):  # ...a function, or
126        x = x.__code__
127    elif hasattr(x, 'gi_code'):  #...a generator object, or
128        x = x.gi_code
129    elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
130        x = x.ag_code
131    elif hasattr(x, 'cr_code'):  #...a coroutine.
132        x = x.cr_code
133    # Handle source code.
134    if isinstance(x, str):
135        x = _try_compile(x, "<disassembly>")
136    # By now, if we don't have a code object, we can't disassemble x.
137    if hasattr(x, 'co_code'):
138        return x
139    raise TypeError("don't know how to disassemble %s objects" %
140                    type(x).__name__)
141
142def code_info(x):
143    """Formatted details of methods, functions, or code."""
144    return _format_code_info(_get_code_object(x))
145
146def _format_code_info(co):
147    lines = []
148    lines.append("Name:              %s" % co.co_name)
149    lines.append("Filename:          %s" % co.co_filename)
150    lines.append("Argument count:    %s" % co.co_argcount)
151    lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
152    lines.append("Number of locals:  %s" % co.co_nlocals)
153    lines.append("Stack size:        %s" % co.co_stacksize)
154    lines.append("Flags:             %s" % pretty_flags(co.co_flags))
155    if co.co_consts:
156        lines.append("Constants:")
157        for i_c in enumerate(co.co_consts):
158            lines.append("%4d: %r" % i_c)
159    if co.co_names:
160        lines.append("Names:")
161        for i_n in enumerate(co.co_names):
162            lines.append("%4d: %s" % i_n)
163    if co.co_varnames:
164        lines.append("Variable names:")
165        for i_n in enumerate(co.co_varnames):
166            lines.append("%4d: %s" % i_n)
167    if co.co_freevars:
168        lines.append("Free variables:")
169        for i_n in enumerate(co.co_freevars):
170            lines.append("%4d: %s" % i_n)
171    if co.co_cellvars:
172        lines.append("Cell variables:")
173        for i_n in enumerate(co.co_cellvars):
174            lines.append("%4d: %s" % i_n)
175    return "\n".join(lines)
176
177def show_code(co, *, file=None):
178    """Print details of methods, functions, or code to *file*.
179
180    If *file* is not provided, the output is printed on stdout.
181    """
182    print(code_info(co), file=file)
183
184_Instruction = collections.namedtuple("_Instruction",
185     "opname opcode arg argval argrepr offset starts_line is_jump_target")
186
187_Instruction.opname.__doc__ = "Human readable name for operation"
188_Instruction.opcode.__doc__ = "Numeric code for operation"
189_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
190_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
191_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
192_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
193_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
194_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
195
196_OPNAME_WIDTH = 20
197_OPARG_WIDTH = 5
198
199class Instruction(_Instruction):
200    """Details for a bytecode operation
201
202       Defined fields:
203         opname - human readable name for operation
204         opcode - numeric code for operation
205         arg - numeric argument to operation (if any), otherwise None
206         argval - resolved arg value (if known), otherwise same as arg
207         argrepr - human readable description of operation argument
208         offset - start index of operation within bytecode sequence
209         starts_line - line started by this opcode (if any), otherwise None
210         is_jump_target - True if other code jumps to here, otherwise False
211    """
212
213    def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
214        """Format instruction details for inclusion in disassembly output
215
216        *lineno_width* sets the width of the line number field (0 omits it)
217        *mark_as_current* inserts a '-->' marker arrow as part of the line
218        *offset_width* sets the width of the instruction offset field
219        """
220        fields = []
221        # Column: Source code line number
222        if lineno_width:
223            if self.starts_line is not None:
224                lineno_fmt = "%%%dd" % lineno_width
225                fields.append(lineno_fmt % self.starts_line)
226            else:
227                fields.append(' ' * lineno_width)
228        # Column: Current instruction indicator
229        if mark_as_current:
230            fields.append('-->')
231        else:
232            fields.append('   ')
233        # Column: Jump target marker
234        if self.is_jump_target:
235            fields.append('>>')
236        else:
237            fields.append('  ')
238        # Column: Instruction offset from start of code sequence
239        fields.append(repr(self.offset).rjust(offset_width))
240        # Column: Opcode name
241        fields.append(self.opname.ljust(_OPNAME_WIDTH))
242        # Column: Opcode argument
243        if self.arg is not None:
244            fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
245            # Column: Opcode argument details
246            if self.argrepr:
247                fields.append('(' + self.argrepr + ')')
248        return ' '.join(fields).rstrip()
249
250
251def get_instructions(x, *, first_line=None):
252    """Iterator for the opcodes in methods, functions or code
253
254    Generates a series of Instruction named tuples giving the details of
255    each operations in the supplied code.
256
257    If *first_line* is not None, it indicates the line number that should
258    be reported for the first source line in the disassembled code.
259    Otherwise, the source line information (if any) is taken directly from
260    the disassembled code object.
261    """
262    co = _get_code_object(x)
263    cell_names = co.co_cellvars + co.co_freevars
264    linestarts = dict(findlinestarts(co))
265    if first_line is not None:
266        line_offset = first_line - co.co_firstlineno
267    else:
268        line_offset = 0
269    return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
270                                   co.co_consts, cell_names, linestarts,
271                                   line_offset)
272
273def _get_const_info(const_index, const_list):
274    """Helper to get optional details about const references
275
276       Returns the dereferenced constant and its repr if the constant
277       list is defined.
278       Otherwise returns the constant index and its repr().
279    """
280    argval = const_index
281    if const_list is not None:
282        argval = const_list[const_index]
283    return argval, repr(argval)
284
285def _get_name_info(name_index, name_list):
286    """Helper to get optional details about named references
287
288       Returns the dereferenced name as both value and repr if the name
289       list is defined.
290       Otherwise returns the name index and its repr().
291    """
292    argval = name_index
293    if name_list is not None:
294        argval = name_list[name_index]
295        argrepr = argval
296    else:
297        argrepr = repr(argval)
298    return argval, argrepr
299
300
301def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
302                      cells=None, linestarts=None, line_offset=0):
303    """Iterate over the instructions in a bytecode string.
304
305    Generates a sequence of Instruction namedtuples giving the details of each
306    opcode.  Additional information about the code's runtime environment
307    (e.g. variable names, constants) can be specified using optional
308    arguments.
309
310    """
311    labels = findlabels(code)
312    starts_line = None
313    for offset, op, arg in _unpack_opargs(code):
314        if linestarts is not None:
315            starts_line = linestarts.get(offset, None)
316            if starts_line is not None:
317                starts_line += line_offset
318        is_jump_target = offset in labels
319        argval = None
320        argrepr = ''
321        if arg is not None:
322            #  Set argval to the dereferenced value of the argument when
323            #  available, and argrepr to the string representation of argval.
324            #    _disassemble_bytes needs the string repr of the
325            #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
326            argval = arg
327            if op in hasconst:
328                argval, argrepr = _get_const_info(arg, constants)
329            elif op in hasname:
330                argval, argrepr = _get_name_info(arg, names)
331            elif op in hasjrel:
332                argval = offset + 2 + arg
333                argrepr = "to " + repr(argval)
334            elif op in haslocal:
335                argval, argrepr = _get_name_info(arg, varnames)
336            elif op in hascompare:
337                argval = cmp_op[arg]
338                argrepr = argval
339            elif op in hasfree:
340                argval, argrepr = _get_name_info(arg, cells)
341            elif op == FORMAT_VALUE:
342                argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4))
343                argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3]
344                if argval[1]:
345                    if argrepr:
346                        argrepr += ', '
347                    argrepr += 'with format'
348        yield Instruction(opname[op], op,
349                          arg, argval, argrepr,
350                          offset, starts_line, is_jump_target)
351
352def disassemble(co, lasti=-1, *, file=None):
353    """Disassemble a code object."""
354    cell_names = co.co_cellvars + co.co_freevars
355    linestarts = dict(findlinestarts(co))
356    _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
357                       co.co_consts, cell_names, linestarts, file=file)
358
359def _disassemble_recursive(co, *, file=None, depth=None):
360    disassemble(co, file=file)
361    if depth is None or depth > 0:
362        if depth is not None:
363            depth = depth - 1
364        for x in co.co_consts:
365            if hasattr(x, 'co_code'):
366                print(file=file)
367                print("Disassembly of %r:" % (x,), file=file)
368                _disassemble_recursive(x, file=file, depth=depth)
369
370def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
371                       constants=None, cells=None, linestarts=None,
372                       *, file=None, line_offset=0):
373    # Omit the line number column entirely if we have no line number info
374    show_lineno = linestarts is not None
375    if show_lineno:
376        maxlineno = max(linestarts.values()) + line_offset
377        if maxlineno >= 1000:
378            lineno_width = len(str(maxlineno))
379        else:
380            lineno_width = 3
381    else:
382        lineno_width = 0
383    maxoffset = len(code) - 2
384    if maxoffset >= 10000:
385        offset_width = len(str(maxoffset))
386    else:
387        offset_width = 4
388    for instr in _get_instructions_bytes(code, varnames, names,
389                                         constants, cells, linestarts,
390                                         line_offset=line_offset):
391        new_source_line = (show_lineno and
392                           instr.starts_line is not None and
393                           instr.offset > 0)
394        if new_source_line:
395            print(file=file)
396        is_current_instr = instr.offset == lasti
397        print(instr._disassemble(lineno_width, is_current_instr, offset_width),
398              file=file)
399
400def _disassemble_str(source, **kwargs):
401    """Compile the source string, then disassemble the code object."""
402    _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs)
403
404disco = disassemble                     # XXX For backwards compatibility
405
406def _unpack_opargs(code):
407    extended_arg = 0
408    for i in range(0, len(code), 2):
409        op = code[i]
410        if op >= HAVE_ARGUMENT:
411            arg = code[i+1] | extended_arg
412            extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
413        else:
414            arg = None
415        yield (i, op, arg)
416
417def findlabels(code):
418    """Detect all offsets in a byte code which are jump targets.
419
420    Return the list of offsets.
421
422    """
423    labels = []
424    for offset, op, arg in _unpack_opargs(code):
425        if arg is not None:
426            if op in hasjrel:
427                label = offset + 2 + arg
428            elif op in hasjabs:
429                label = arg
430            else:
431                continue
432            if label not in labels:
433                labels.append(label)
434    return labels
435
436def findlinestarts(code):
437    """Find the offsets in a byte code which are start of lines in the source.
438
439    Generate pairs (offset, lineno) as described in Python/compile.c.
440
441    """
442    byte_increments = code.co_lnotab[0::2]
443    line_increments = code.co_lnotab[1::2]
444
445    lastlineno = None
446    lineno = code.co_firstlineno
447    addr = 0
448    for byte_incr, line_incr in zip(byte_increments, line_increments):
449        if byte_incr:
450            if lineno != lastlineno:
451                yield (addr, lineno)
452                lastlineno = lineno
453            addr += byte_incr
454        if line_incr >= 0x80:
455            # line_increments is an array of 8-bit signed integers
456            line_incr -= 0x100
457        lineno += line_incr
458    if lineno != lastlineno:
459        yield (addr, lineno)
460
461class Bytecode:
462    """The bytecode operations of a piece of code
463
464    Instantiate this with a function, method, other compiled object, string of
465    code, or a code object (as returned by compile()).
466
467    Iterating over this yields the bytecode operations as Instruction instances.
468    """
469    def __init__(self, x, *, first_line=None, current_offset=None):
470        self.codeobj = co = _get_code_object(x)
471        if first_line is None:
472            self.first_line = co.co_firstlineno
473            self._line_offset = 0
474        else:
475            self.first_line = first_line
476            self._line_offset = first_line - co.co_firstlineno
477        self._cell_names = co.co_cellvars + co.co_freevars
478        self._linestarts = dict(findlinestarts(co))
479        self._original_object = x
480        self.current_offset = current_offset
481
482    def __iter__(self):
483        co = self.codeobj
484        return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
485                                       co.co_consts, self._cell_names,
486                                       self._linestarts,
487                                       line_offset=self._line_offset)
488
489    def __repr__(self):
490        return "{}({!r})".format(self.__class__.__name__,
491                                 self._original_object)
492
493    @classmethod
494    def from_traceback(cls, tb):
495        """ Construct a Bytecode from the given traceback """
496        while tb.tb_next:
497            tb = tb.tb_next
498        return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
499
500    def info(self):
501        """Return formatted information about the code object."""
502        return _format_code_info(self.codeobj)
503
504    def dis(self):
505        """Return a formatted view of the bytecode operations."""
506        co = self.codeobj
507        if self.current_offset is not None:
508            offset = self.current_offset
509        else:
510            offset = -1
511        with io.StringIO() as output:
512            _disassemble_bytes(co.co_code, varnames=co.co_varnames,
513                               names=co.co_names, constants=co.co_consts,
514                               cells=self._cell_names,
515                               linestarts=self._linestarts,
516                               line_offset=self._line_offset,
517                               file=output,
518                               lasti=offset)
519            return output.getvalue()
520
521
522def _test():
523    """Simple test program to disassemble a file."""
524    import argparse
525
526    parser = argparse.ArgumentParser()
527    parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
528    args = parser.parse_args()
529    with args.infile as infile:
530        source = infile.read()
531    code = compile(source, args.infile.name, "exec")
532    dis(code)
533
534if __name__ == "__main__":
535    _test()
536