• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Disassembler of Python byte code into mnemonics."""
2
3import sys
4import types
5import collections
6import io
7
8from opcode import *
9from opcode import __all__ as _opcodes_all
10
11__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
12           "findlinestarts", "findlabels", "show_code",
13           "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
14del _opcodes_all
15
16_have_code = (types.MethodType, types.FunctionType, types.CodeType,
17              classmethod, staticmethod, type)
18
19FORMAT_VALUE = opmap['FORMAT_VALUE']
20FORMAT_VALUE_CONVERTERS = (
21    (None, ''),
22    (str, 'str'),
23    (repr, 'repr'),
24    (ascii, 'ascii'),
25)
26MAKE_FUNCTION = opmap['MAKE_FUNCTION']
27MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure')
28
29
30def _try_compile(source, name):
31    """Attempts to compile the given source, first as an expression and
32       then as a statement if the first approach fails.
33
34       Utility function to accept strings in functions that otherwise
35       expect code objects
36    """
37    try:
38        c = compile(source, name, 'eval')
39    except SyntaxError:
40        c = compile(source, name, 'exec')
41    return c
42
43def dis(x=None, *, file=None, depth=None):
44    """Disassemble classes, methods, functions, and other compiled objects.
45
46    With no argument, disassemble the last traceback.
47
48    Compiled objects currently include generator objects, async generator
49    objects, and coroutine objects, all of which store their code object
50    in a special attribute.
51    """
52    if x is None:
53        distb(file=file)
54        return
55    # Extract functions from methods.
56    if hasattr(x, '__func__'):
57        x = x.__func__
58    # Extract compiled code objects from...
59    if hasattr(x, '__code__'):  # ...a function, or
60        x = x.__code__
61    elif hasattr(x, 'gi_code'):  #...a generator object, or
62        x = x.gi_code
63    elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
64        x = x.ag_code
65    elif hasattr(x, 'cr_code'):  #...a coroutine.
66        x = x.cr_code
67    # Perform the disassembly.
68    if hasattr(x, '__dict__'):  # Class or module
69        items = sorted(x.__dict__.items())
70        for name, x1 in items:
71            if isinstance(x1, _have_code):
72                print("Disassembly of %s:" % name, file=file)
73                try:
74                    dis(x1, file=file, depth=depth)
75                except TypeError as msg:
76                    print("Sorry:", msg, file=file)
77                print(file=file)
78    elif hasattr(x, 'co_code'): # Code object
79        _disassemble_recursive(x, file=file, depth=depth)
80    elif isinstance(x, (bytes, bytearray)): # Raw bytecode
81        _disassemble_bytes(x, file=file)
82    elif isinstance(x, str):    # Source code
83        _disassemble_str(x, file=file, depth=depth)
84    else:
85        raise TypeError("don't know how to disassemble %s objects" %
86                        type(x).__name__)
87
88def distb(tb=None, *, file=None):
89    """Disassemble a traceback (default: last traceback)."""
90    if tb is None:
91        try:
92            tb = sys.last_traceback
93        except AttributeError:
94            raise RuntimeError("no last traceback to disassemble") from None
95        while tb.tb_next: tb = tb.tb_next
96    disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
97
98# The inspect module interrogates this dictionary to build its
99# list of CO_* constants. It is also used by pretty_flags to
100# turn the co_flags field into a human readable list.
101COMPILER_FLAG_NAMES = {
102     1: "OPTIMIZED",
103     2: "NEWLOCALS",
104     4: "VARARGS",
105     8: "VARKEYWORDS",
106    16: "NESTED",
107    32: "GENERATOR",
108    64: "NOFREE",
109   128: "COROUTINE",
110   256: "ITERABLE_COROUTINE",
111   512: "ASYNC_GENERATOR",
112}
113
114def pretty_flags(flags):
115    """Return pretty representation of code flags."""
116    names = []
117    for i in range(32):
118        flag = 1<<i
119        if flags & flag:
120            names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
121            flags ^= flag
122            if not flags:
123                break
124    else:
125        names.append(hex(flags))
126    return ", ".join(names)
127
128def _get_code_object(x):
129    """Helper to handle methods, compiled or raw code objects, and strings."""
130    # Extract functions from methods.
131    if hasattr(x, '__func__'):
132        x = x.__func__
133    # Extract compiled code objects from...
134    if hasattr(x, '__code__'):  # ...a function, or
135        x = x.__code__
136    elif hasattr(x, 'gi_code'):  #...a generator object, or
137        x = x.gi_code
138    elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
139        x = x.ag_code
140    elif hasattr(x, 'cr_code'):  #...a coroutine.
141        x = x.cr_code
142    # Handle source code.
143    if isinstance(x, str):
144        x = _try_compile(x, "<disassembly>")
145    # By now, if we don't have a code object, we can't disassemble x.
146    if hasattr(x, 'co_code'):
147        return x
148    raise TypeError("don't know how to disassemble %s objects" %
149                    type(x).__name__)
150
151def code_info(x):
152    """Formatted details of methods, functions, or code."""
153    return _format_code_info(_get_code_object(x))
154
155def _format_code_info(co):
156    lines = []
157    lines.append("Name:              %s" % co.co_name)
158    lines.append("Filename:          %s" % co.co_filename)
159    lines.append("Argument count:    %s" % co.co_argcount)
160    lines.append("Positional-only arguments: %s" % co.co_posonlyargcount)
161    lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
162    lines.append("Number of locals:  %s" % co.co_nlocals)
163    lines.append("Stack size:        %s" % co.co_stacksize)
164    lines.append("Flags:             %s" % pretty_flags(co.co_flags))
165    if co.co_consts:
166        lines.append("Constants:")
167        for i_c in enumerate(co.co_consts):
168            lines.append("%4d: %r" % i_c)
169    if co.co_names:
170        lines.append("Names:")
171        for i_n in enumerate(co.co_names):
172            lines.append("%4d: %s" % i_n)
173    if co.co_varnames:
174        lines.append("Variable names:")
175        for i_n in enumerate(co.co_varnames):
176            lines.append("%4d: %s" % i_n)
177    if co.co_freevars:
178        lines.append("Free variables:")
179        for i_n in enumerate(co.co_freevars):
180            lines.append("%4d: %s" % i_n)
181    if co.co_cellvars:
182        lines.append("Cell variables:")
183        for i_n in enumerate(co.co_cellvars):
184            lines.append("%4d: %s" % i_n)
185    return "\n".join(lines)
186
187def show_code(co, *, file=None):
188    """Print details of methods, functions, or code to *file*.
189
190    If *file* is not provided, the output is printed on stdout.
191    """
192    print(code_info(co), file=file)
193
194_Instruction = collections.namedtuple("_Instruction",
195     "opname opcode arg argval argrepr offset starts_line is_jump_target")
196
197_Instruction.opname.__doc__ = "Human readable name for operation"
198_Instruction.opcode.__doc__ = "Numeric code for operation"
199_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
200_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
201_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
202_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
203_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
204_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
205
206_OPNAME_WIDTH = 20
207_OPARG_WIDTH = 5
208
209class Instruction(_Instruction):
210    """Details for a bytecode operation
211
212       Defined fields:
213         opname - human readable name for operation
214         opcode - numeric code for operation
215         arg - numeric argument to operation (if any), otherwise None
216         argval - resolved arg value (if known), otherwise same as arg
217         argrepr - human readable description of operation argument
218         offset - start index of operation within bytecode sequence
219         starts_line - line started by this opcode (if any), otherwise None
220         is_jump_target - True if other code jumps to here, otherwise False
221    """
222
223    def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
224        """Format instruction details for inclusion in disassembly output
225
226        *lineno_width* sets the width of the line number field (0 omits it)
227        *mark_as_current* inserts a '-->' marker arrow as part of the line
228        *offset_width* sets the width of the instruction offset field
229        """
230        fields = []
231        # Column: Source code line number
232        if lineno_width:
233            if self.starts_line is not None:
234                lineno_fmt = "%%%dd" % lineno_width
235                fields.append(lineno_fmt % self.starts_line)
236            else:
237                fields.append(' ' * lineno_width)
238        # Column: Current instruction indicator
239        if mark_as_current:
240            fields.append('-->')
241        else:
242            fields.append('   ')
243        # Column: Jump target marker
244        if self.is_jump_target:
245            fields.append('>>')
246        else:
247            fields.append('  ')
248        # Column: Instruction offset from start of code sequence
249        fields.append(repr(self.offset).rjust(offset_width))
250        # Column: Opcode name
251        fields.append(self.opname.ljust(_OPNAME_WIDTH))
252        # Column: Opcode argument
253        if self.arg is not None:
254            fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
255            # Column: Opcode argument details
256            if self.argrepr:
257                fields.append('(' + self.argrepr + ')')
258        return ' '.join(fields).rstrip()
259
260
261def get_instructions(x, *, first_line=None):
262    """Iterator for the opcodes in methods, functions or code
263
264    Generates a series of Instruction named tuples giving the details of
265    each operations in the supplied code.
266
267    If *first_line* is not None, it indicates the line number that should
268    be reported for the first source line in the disassembled code.
269    Otherwise, the source line information (if any) is taken directly from
270    the disassembled code object.
271    """
272    co = _get_code_object(x)
273    cell_names = co.co_cellvars + co.co_freevars
274    linestarts = dict(findlinestarts(co))
275    if first_line is not None:
276        line_offset = first_line - co.co_firstlineno
277    else:
278        line_offset = 0
279    return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
280                                   co.co_consts, cell_names, linestarts,
281                                   line_offset)
282
283def _get_const_info(const_index, const_list):
284    """Helper to get optional details about const references
285
286       Returns the dereferenced constant and its repr if the constant
287       list is defined.
288       Otherwise returns the constant index and its repr().
289    """
290    argval = const_index
291    if const_list is not None:
292        argval = const_list[const_index]
293    return argval, repr(argval)
294
295def _get_name_info(name_index, name_list):
296    """Helper to get optional details about named references
297
298       Returns the dereferenced name as both value and repr if the name
299       list is defined.
300       Otherwise returns the name index and its repr().
301    """
302    argval = name_index
303    if name_list is not None:
304        argval = name_list[name_index]
305        argrepr = argval
306    else:
307        argrepr = repr(argval)
308    return argval, argrepr
309
310
311def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
312                      cells=None, linestarts=None, line_offset=0):
313    """Iterate over the instructions in a bytecode string.
314
315    Generates a sequence of Instruction namedtuples giving the details of each
316    opcode.  Additional information about the code's runtime environment
317    (e.g. variable names, constants) can be specified using optional
318    arguments.
319
320    """
321    labels = findlabels(code)
322    starts_line = None
323    for offset, op, arg in _unpack_opargs(code):
324        if linestarts is not None:
325            starts_line = linestarts.get(offset, None)
326            if starts_line is not None:
327                starts_line += line_offset
328        is_jump_target = offset in labels
329        argval = None
330        argrepr = ''
331        if arg is not None:
332            #  Set argval to the dereferenced value of the argument when
333            #  available, and argrepr to the string representation of argval.
334            #    _disassemble_bytes needs the string repr of the
335            #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
336            argval = arg
337            if op in hasconst:
338                argval, argrepr = _get_const_info(arg, constants)
339            elif op in hasname:
340                argval, argrepr = _get_name_info(arg, names)
341            elif op in hasjabs:
342                argval = arg*2
343                argrepr = "to " + repr(argval)
344            elif op in hasjrel:
345                argval = offset + 2 + arg*2
346                argrepr = "to " + repr(argval)
347            elif op in haslocal:
348                argval, argrepr = _get_name_info(arg, varnames)
349            elif op in hascompare:
350                argval = cmp_op[arg]
351                argrepr = argval
352            elif op in hasfree:
353                argval, argrepr = _get_name_info(arg, cells)
354            elif op == FORMAT_VALUE:
355                argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
356                argval = (argval, bool(arg & 0x4))
357                if argval[1]:
358                    if argrepr:
359                        argrepr += ', '
360                    argrepr += 'with format'
361            elif op == MAKE_FUNCTION:
362                argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS)
363                                    if arg & (1<<i))
364        yield Instruction(opname[op], op,
365                          arg, argval, argrepr,
366                          offset, starts_line, is_jump_target)
367
368def disassemble(co, lasti=-1, *, file=None):
369    """Disassemble a code object."""
370    cell_names = co.co_cellvars + co.co_freevars
371    linestarts = dict(findlinestarts(co))
372    _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
373                       co.co_consts, cell_names, linestarts, file=file)
374
375def _disassemble_recursive(co, *, file=None, depth=None):
376    disassemble(co, file=file)
377    if depth is None or depth > 0:
378        if depth is not None:
379            depth = depth - 1
380        for x in co.co_consts:
381            if hasattr(x, 'co_code'):
382                print(file=file)
383                print("Disassembly of %r:" % (x,), file=file)
384                _disassemble_recursive(x, file=file, depth=depth)
385
386def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
387                       constants=None, cells=None, linestarts=None,
388                       *, file=None, line_offset=0):
389    # Omit the line number column entirely if we have no line number info
390    show_lineno = bool(linestarts)
391    if show_lineno:
392        maxlineno = max(linestarts.values()) + line_offset
393        if maxlineno >= 1000:
394            lineno_width = len(str(maxlineno))
395        else:
396            lineno_width = 3
397    else:
398        lineno_width = 0
399    maxoffset = len(code) - 2
400    if maxoffset >= 10000:
401        offset_width = len(str(maxoffset))
402    else:
403        offset_width = 4
404    for instr in _get_instructions_bytes(code, varnames, names,
405                                         constants, cells, linestarts,
406                                         line_offset=line_offset):
407        new_source_line = (show_lineno and
408                           instr.starts_line is not None and
409                           instr.offset > 0)
410        if new_source_line:
411            print(file=file)
412        is_current_instr = instr.offset == lasti
413        print(instr._disassemble(lineno_width, is_current_instr, offset_width),
414              file=file)
415
416def _disassemble_str(source, **kwargs):
417    """Compile the source string, then disassemble the code object."""
418    _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs)
419
420disco = disassemble                     # XXX For backwards compatibility
421
422def _unpack_opargs(code):
423    extended_arg = 0
424    for i in range(0, len(code), 2):
425        op = code[i]
426        if op >= HAVE_ARGUMENT:
427            arg = code[i+1] | extended_arg
428            extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
429        else:
430            arg = None
431            extended_arg = 0
432        yield (i, op, arg)
433
434def findlabels(code):
435    """Detect all offsets in a byte code which are jump targets.
436
437    Return the list of offsets.
438
439    """
440    labels = []
441    for offset, op, arg in _unpack_opargs(code):
442        if arg is not None:
443            if op in hasjrel:
444                label = offset + 2 + arg*2
445            elif op in hasjabs:
446                label = arg*2
447            else:
448                continue
449            if label not in labels:
450                labels.append(label)
451    return labels
452
453def findlinestarts(code):
454    """Find the offsets in a byte code which are start of lines in the source.
455
456    Generate pairs (offset, lineno)
457    """
458    lastline = None
459    for start, end, line in code.co_lines():
460        if line is not None and line != lastline:
461            lastline = line
462            yield start, line
463    return
464
465
466class Bytecode:
467    """The bytecode operations of a piece of code
468
469    Instantiate this with a function, method, other compiled object, string of
470    code, or a code object (as returned by compile()).
471
472    Iterating over this yields the bytecode operations as Instruction instances.
473    """
474    def __init__(self, x, *, first_line=None, current_offset=None):
475        self.codeobj = co = _get_code_object(x)
476        if first_line is None:
477            self.first_line = co.co_firstlineno
478            self._line_offset = 0
479        else:
480            self.first_line = first_line
481            self._line_offset = first_line - co.co_firstlineno
482        self._cell_names = co.co_cellvars + co.co_freevars
483        self._linestarts = dict(findlinestarts(co))
484        self._original_object = x
485        self.current_offset = current_offset
486
487    def __iter__(self):
488        co = self.codeobj
489        return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
490                                       co.co_consts, self._cell_names,
491                                       self._linestarts,
492                                       line_offset=self._line_offset)
493
494    def __repr__(self):
495        return "{}({!r})".format(self.__class__.__name__,
496                                 self._original_object)
497
498    @classmethod
499    def from_traceback(cls, tb):
500        """ Construct a Bytecode from the given traceback """
501        while tb.tb_next:
502            tb = tb.tb_next
503        return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
504
505    def info(self):
506        """Return formatted information about the code object."""
507        return _format_code_info(self.codeobj)
508
509    def dis(self):
510        """Return a formatted view of the bytecode operations."""
511        co = self.codeobj
512        if self.current_offset is not None:
513            offset = self.current_offset
514        else:
515            offset = -1
516        with io.StringIO() as output:
517            _disassemble_bytes(co.co_code, varnames=co.co_varnames,
518                               names=co.co_names, constants=co.co_consts,
519                               cells=self._cell_names,
520                               linestarts=self._linestarts,
521                               line_offset=self._line_offset,
522                               file=output,
523                               lasti=offset)
524            return output.getvalue()
525
526
527def _test():
528    """Simple test program to disassemble a file."""
529    import argparse
530
531    parser = argparse.ArgumentParser()
532    parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-')
533    args = parser.parse_args()
534    with args.infile as infile:
535        source = infile.read()
536    code = compile(source, args.infile.name, "exec")
537    dis(code)
538
539if __name__ == "__main__":
540    _test()
541