• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Disassembler of Python byte code into mnemonics."""
2
3import sys
4import types
5import collections
6import io
7
8from opcode import *
9from opcode import (
10    __all__ as _opcodes_all,
11    _cache_format,
12    _inline_cache_entries,
13    _nb_ops,
14    _intrinsic_1_descs,
15    _intrinsic_2_descs,
16    _specializations,
17    _specialized_opmap,
18)
19
20from _opcode import get_executor
21
22__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
23           "findlinestarts", "findlabels", "show_code",
24           "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
25del _opcodes_all
26
27_have_code = (types.MethodType, types.FunctionType, types.CodeType,
28              classmethod, staticmethod, type)
29
30CONVERT_VALUE = opmap['CONVERT_VALUE']
31
32SET_FUNCTION_ATTRIBUTE = opmap['SET_FUNCTION_ATTRIBUTE']
33FUNCTION_ATTR_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure')
34
35ENTER_EXECUTOR = opmap['ENTER_EXECUTOR']
36LOAD_CONST = opmap['LOAD_CONST']
37RETURN_CONST = opmap['RETURN_CONST']
38LOAD_GLOBAL = opmap['LOAD_GLOBAL']
39BINARY_OP = opmap['BINARY_OP']
40JUMP_BACKWARD = opmap['JUMP_BACKWARD']
41FOR_ITER = opmap['FOR_ITER']
42SEND = opmap['SEND']
43LOAD_ATTR = opmap['LOAD_ATTR']
44LOAD_SUPER_ATTR = opmap['LOAD_SUPER_ATTR']
45CALL_INTRINSIC_1 = opmap['CALL_INTRINSIC_1']
46CALL_INTRINSIC_2 = opmap['CALL_INTRINSIC_2']
47LOAD_FAST_LOAD_FAST = opmap['LOAD_FAST_LOAD_FAST']
48STORE_FAST_LOAD_FAST = opmap['STORE_FAST_LOAD_FAST']
49STORE_FAST_STORE_FAST = opmap['STORE_FAST_STORE_FAST']
50
51CACHE = opmap["CACHE"]
52
53_all_opname = list(opname)
54_all_opmap = dict(opmap)
55for name, op in _specialized_opmap.items():
56    # fill opname and opmap
57    assert op < len(_all_opname)
58    _all_opname[op] = name
59    _all_opmap[name] = op
60
61deoptmap = {
62    specialized: base for base, family in _specializations.items() for specialized in family
63}
64
65def _try_compile(source, name):
66    """Attempts to compile the given source, first as an expression and
67       then as a statement if the first approach fails.
68
69       Utility function to accept strings in functions that otherwise
70       expect code objects
71    """
72    try:
73        return compile(source, name, 'eval')
74    except SyntaxError:
75        pass
76    return compile(source, name, 'exec')
77
78def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False,
79        show_offsets=False):
80    """Disassemble classes, methods, functions, and other compiled objects.
81
82    With no argument, disassemble the last traceback.
83
84    Compiled objects currently include generator objects, async generator
85    objects, and coroutine objects, all of which store their code object
86    in a special attribute.
87    """
88    if x is None:
89        distb(file=file, show_caches=show_caches, adaptive=adaptive,
90              show_offsets=show_offsets)
91        return
92    # Extract functions from methods.
93    if hasattr(x, '__func__'):
94        x = x.__func__
95    # Extract compiled code objects from...
96    if hasattr(x, '__code__'):  # ...a function, or
97        x = x.__code__
98    elif hasattr(x, 'gi_code'):  #...a generator object, or
99        x = x.gi_code
100    elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
101        x = x.ag_code
102    elif hasattr(x, 'cr_code'):  #...a coroutine.
103        x = x.cr_code
104    # Perform the disassembly.
105    if hasattr(x, '__dict__'):  # Class or module
106        items = sorted(x.__dict__.items())
107        for name, x1 in items:
108            if isinstance(x1, _have_code):
109                print("Disassembly of %s:" % name, file=file)
110                try:
111                    dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
112                except TypeError as msg:
113                    print("Sorry:", msg, file=file)
114                print(file=file)
115    elif hasattr(x, 'co_code'): # Code object
116        _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
117    elif isinstance(x, (bytes, bytearray)): # Raw bytecode
118        labels_map = _make_labels_map(x)
119        label_width = 4 + len(str(len(labels_map)))
120        formatter = Formatter(file=file,
121                              offset_width=len(str(max(len(x) - 2, 9999))) if show_offsets else 0,
122                              label_width=label_width,
123                              show_caches=show_caches)
124        arg_resolver = ArgResolver(labels_map=labels_map)
125        _disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter)
126    elif isinstance(x, str):    # Source code
127        _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
128    else:
129        raise TypeError("don't know how to disassemble %s objects" %
130                        type(x).__name__)
131
132def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets=False):
133    """Disassemble a traceback (default: last traceback)."""
134    if tb is None:
135        try:
136            if hasattr(sys, 'last_exc'):
137                tb = sys.last_exc.__traceback__
138            else:
139                tb = sys.last_traceback
140        except AttributeError:
141            raise RuntimeError("no last traceback to disassemble") from None
142        while tb.tb_next: tb = tb.tb_next
143    disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
144
145# The inspect module interrogates this dictionary to build its
146# list of CO_* constants. It is also used by pretty_flags to
147# turn the co_flags field into a human readable list.
148COMPILER_FLAG_NAMES = {
149     1: "OPTIMIZED",
150     2: "NEWLOCALS",
151     4: "VARARGS",
152     8: "VARKEYWORDS",
153    16: "NESTED",
154    32: "GENERATOR",
155    64: "NOFREE",
156   128: "COROUTINE",
157   256: "ITERABLE_COROUTINE",
158   512: "ASYNC_GENERATOR",
159}
160
161def pretty_flags(flags):
162    """Return pretty representation of code flags."""
163    names = []
164    for i in range(32):
165        flag = 1<<i
166        if flags & flag:
167            names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
168            flags ^= flag
169            if not flags:
170                break
171    else:
172        names.append(hex(flags))
173    return ", ".join(names)
174
175class _Unknown:
176    def __repr__(self):
177        return "<unknown>"
178
179# Sentinel to represent values that cannot be calculated
180UNKNOWN = _Unknown()
181
182def _get_code_object(x):
183    """Helper to handle methods, compiled or raw code objects, and strings."""
184    # Extract functions from methods.
185    if hasattr(x, '__func__'):
186        x = x.__func__
187    # Extract compiled code objects from...
188    if hasattr(x, '__code__'):  # ...a function, or
189        x = x.__code__
190    elif hasattr(x, 'gi_code'):  #...a generator object, or
191        x = x.gi_code
192    elif hasattr(x, 'ag_code'):  #...an asynchronous generator object, or
193        x = x.ag_code
194    elif hasattr(x, 'cr_code'):  #...a coroutine.
195        x = x.cr_code
196    # Handle source code.
197    if isinstance(x, str):
198        x = _try_compile(x, "<disassembly>")
199    # By now, if we don't have a code object, we can't disassemble x.
200    if hasattr(x, 'co_code'):
201        return x
202    raise TypeError("don't know how to disassemble %s objects" %
203                    type(x).__name__)
204
205def _deoptop(op):
206    name = _all_opname[op]
207    return _all_opmap[deoptmap[name]] if name in deoptmap else op
208
209def _get_code_array(co, adaptive):
210    if adaptive:
211        code = co._co_code_adaptive
212        res = []
213        found = False
214        for i in range(0, len(code), 2):
215            op, arg = code[i], code[i+1]
216            if op == ENTER_EXECUTOR:
217                try:
218                    ex = get_executor(co, i)
219                except (ValueError, RuntimeError):
220                    ex = None
221
222                if ex:
223                    op, arg = ex.get_opcode(), ex.get_oparg()
224                    found = True
225
226            res.append(op.to_bytes())
227            res.append(arg.to_bytes())
228        return code if not found else b''.join(res)
229    else:
230        return co.co_code
231
232def code_info(x):
233    """Formatted details of methods, functions, or code."""
234    return _format_code_info(_get_code_object(x))
235
236def _format_code_info(co):
237    lines = []
238    lines.append("Name:              %s" % co.co_name)
239    lines.append("Filename:          %s" % co.co_filename)
240    lines.append("Argument count:    %s" % co.co_argcount)
241    lines.append("Positional-only arguments: %s" % co.co_posonlyargcount)
242    lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
243    lines.append("Number of locals:  %s" % co.co_nlocals)
244    lines.append("Stack size:        %s" % co.co_stacksize)
245    lines.append("Flags:             %s" % pretty_flags(co.co_flags))
246    if co.co_consts:
247        lines.append("Constants:")
248        for i_c in enumerate(co.co_consts):
249            lines.append("%4d: %r" % i_c)
250    if co.co_names:
251        lines.append("Names:")
252        for i_n in enumerate(co.co_names):
253            lines.append("%4d: %s" % i_n)
254    if co.co_varnames:
255        lines.append("Variable names:")
256        for i_n in enumerate(co.co_varnames):
257            lines.append("%4d: %s" % i_n)
258    if co.co_freevars:
259        lines.append("Free variables:")
260        for i_n in enumerate(co.co_freevars):
261            lines.append("%4d: %s" % i_n)
262    if co.co_cellvars:
263        lines.append("Cell variables:")
264        for i_n in enumerate(co.co_cellvars):
265            lines.append("%4d: %s" % i_n)
266    return "\n".join(lines)
267
268def show_code(co, *, file=None):
269    """Print details of methods, functions, or code to *file*.
270
271    If *file* is not provided, the output is printed on stdout.
272    """
273    print(code_info(co), file=file)
274
275Positions = collections.namedtuple(
276    'Positions',
277    [
278        'lineno',
279        'end_lineno',
280        'col_offset',
281        'end_col_offset',
282    ],
283    defaults=[None] * 4
284)
285
286_Instruction = collections.namedtuple(
287    "_Instruction",
288    [
289        'opname',
290        'opcode',
291        'arg',
292        'argval',
293        'argrepr',
294        'offset',
295        'start_offset',
296        'starts_line',
297        'line_number',
298        'label',
299        'positions',
300        'cache_info',
301    ],
302    defaults=[None, None, None]
303)
304
305_Instruction.opname.__doc__ = "Human readable name for operation"
306_Instruction.opcode.__doc__ = "Numeric code for operation"
307_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
308_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
309_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
310_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
311_Instruction.start_offset.__doc__ = (
312    "Start index of operation within bytecode sequence, including extended args if present; "
313    "otherwise equal to Instruction.offset"
314)
315_Instruction.starts_line.__doc__ = "True if this opcode starts a source line, otherwise False"
316_Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None"
317_Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None"
318_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
319_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction"
320
321_ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase",
322    "start end target depth lasti")
323
324class _ExceptionTableEntry(_ExceptionTableEntryBase):
325    pass
326
327_OPNAME_WIDTH = 20
328_OPARG_WIDTH = 5
329
330def _get_cache_size(opname):
331    return _inline_cache_entries.get(opname, 0)
332
333def _get_jump_target(op, arg, offset):
334    """Gets the bytecode offset of the jump target if this is a jump instruction.
335
336    Otherwise return None.
337    """
338    deop = _deoptop(op)
339    caches = _get_cache_size(_all_opname[deop])
340    if deop in hasjrel:
341        if _is_backward_jump(deop):
342            arg = -arg
343        target = offset + 2 + arg*2
344        target += 2 * caches
345    elif deop in hasjabs:
346        target = arg*2
347    else:
348        target = None
349    return target
350
351class Instruction(_Instruction):
352    """Details for a bytecode operation.
353
354       Defined fields:
355         opname - human readable name for operation
356         opcode - numeric code for operation
357         arg - numeric argument to operation (if any), otherwise None
358         argval - resolved arg value (if known), otherwise same as arg
359         argrepr - human readable description of operation argument
360         offset - start index of operation within bytecode sequence
361         start_offset - start index of operation within bytecode sequence including extended args if present;
362                        otherwise equal to Instruction.offset
363         starts_line - True if this opcode starts a source line, otherwise False
364         line_number - source line number associated with this opcode (if any), otherwise None
365         label - A label if this instruction is a jump target, otherwise None
366         positions - Optional dis.Positions object holding the span of source code
367                     covered by this instruction
368         cache_info - information about the format and content of the instruction's cache
369                        entries (if any)
370    """
371
372    @property
373    def oparg(self):
374        """Alias for Instruction.arg."""
375        return self.arg
376
377    @property
378    def baseopcode(self):
379        """Numeric code for the base operation if operation is specialized.
380
381        Otherwise equal to Instruction.opcode.
382        """
383        return _deoptop(self.opcode)
384
385    @property
386    def baseopname(self):
387        """Human readable name for the base operation if operation is specialized.
388
389        Otherwise equal to Instruction.opname.
390        """
391        return opname[self.baseopcode]
392
393    @property
394    def cache_offset(self):
395        """Start index of the cache entries following the operation."""
396        return self.offset + 2
397
398    @property
399    def end_offset(self):
400        """End index of the cache entries following the operation."""
401        return self.cache_offset + _get_cache_size(_all_opname[self.opcode])*2
402
403    @property
404    def jump_target(self):
405        """Bytecode index of the jump target if this is a jump operation.
406
407        Otherwise return None.
408        """
409        return _get_jump_target(self.opcode, self.arg, self.offset)
410
411    @property
412    def is_jump_target(self):
413        """True if other code jumps to here, otherwise False"""
414        return self.label is not None
415
416    def __str__(self):
417        output = io.StringIO()
418        formatter = Formatter(file=output)
419        formatter.print_instruction(self, False)
420        return output.getvalue()
421
422
423class Formatter:
424
425    def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0,
426                       line_offset=0, show_caches=False):
427        """Create a Formatter
428
429        *file* where to write the output
430        *lineno_width* sets the width of the line number field (0 omits it)
431        *offset_width* sets the width of the instruction offset field
432        *label_width* sets the width of the label field
433        *show_caches* is a boolean indicating whether to display cache lines
434
435        """
436        self.file = file
437        self.lineno_width = lineno_width
438        self.offset_width = offset_width
439        self.label_width = label_width
440        self.show_caches = show_caches
441
442    def print_instruction(self, instr, mark_as_current=False):
443        self.print_instruction_line(instr, mark_as_current)
444        if self.show_caches and instr.cache_info:
445            offset = instr.offset
446            for name, size, data in instr.cache_info:
447                for i in range(size):
448                    offset += 2
449                    # Only show the fancy argrepr for a CACHE instruction when it's
450                    # the first entry for a particular cache value:
451                    if i == 0:
452                        argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
453                    else:
454                        argrepr = ""
455                    self.print_instruction_line(
456                        Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
457                                    False, None, None, instr.positions),
458                        False)
459
460    def print_instruction_line(self, instr, mark_as_current):
461        """Format instruction details for inclusion in disassembly output."""
462        lineno_width = self.lineno_width
463        offset_width = self.offset_width
464        label_width = self.label_width
465
466        new_source_line = (lineno_width > 0 and
467                           instr.starts_line and
468                           instr.offset > 0)
469        if new_source_line:
470            print(file=self.file)
471
472        fields = []
473        # Column: Source code line number
474        if lineno_width:
475            if instr.starts_line:
476                lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds"
477                lineno_fmt = lineno_fmt % lineno_width
478                lineno = _NO_LINENO if instr.line_number is None else instr.line_number
479                fields.append(lineno_fmt % lineno)
480            else:
481                fields.append(' ' * lineno_width)
482        # Column: Label
483        if instr.label is not None:
484            lbl = f"L{instr.label}:"
485            fields.append(f"{lbl:>{label_width}}")
486        else:
487            fields.append(' ' * label_width)
488        # Column: Instruction offset from start of code sequence
489        if offset_width > 0:
490            fields.append(f"{repr(instr.offset):>{offset_width}}  ")
491        # Column: Current instruction indicator
492        if mark_as_current:
493            fields.append('-->')
494        else:
495            fields.append('   ')
496        # Column: Opcode name
497        fields.append(instr.opname.ljust(_OPNAME_WIDTH))
498        # Column: Opcode argument
499        if instr.arg is not None:
500            arg = repr(instr.arg)
501            # If opname is longer than _OPNAME_WIDTH, we allow it to overflow into
502            # the space reserved for oparg. This results in fewer misaligned opargs
503            # in the disassembly output.
504            opname_excess = max(0, len(instr.opname) - _OPNAME_WIDTH)
505            fields.append(repr(instr.arg).rjust(_OPARG_WIDTH - opname_excess))
506            # Column: Opcode argument details
507            if instr.argrepr:
508                fields.append('(' + instr.argrepr + ')')
509        print(' '.join(fields).rstrip(), file=self.file)
510
511    def print_exception_table(self, exception_entries):
512        file = self.file
513        if exception_entries:
514            print("ExceptionTable:", file=file)
515            for entry in exception_entries:
516                lasti = " lasti" if entry.lasti else ""
517                start = entry.start_label
518                end = entry.end_label
519                target = entry.target_label
520                print(f"  L{start} to L{end} -> L{target} [{entry.depth}]{lasti}", file=file)
521
522
523class ArgResolver:
524    def __init__(self, co_consts=None, names=None, varname_from_oparg=None, labels_map=None):
525        self.co_consts = co_consts
526        self.names = names
527        self.varname_from_oparg = varname_from_oparg
528        self.labels_map = labels_map or {}
529
530    def offset_from_jump_arg(self, op, arg, offset):
531        deop = _deoptop(op)
532        if deop in hasjabs:
533            return arg * 2
534        elif deop in hasjrel:
535            signed_arg = -arg if _is_backward_jump(deop) else arg
536            argval = offset + 2 + signed_arg*2
537            caches = _get_cache_size(_all_opname[deop])
538            argval += 2 * caches
539            return argval
540        return None
541
542    def get_label_for_offset(self, offset):
543        return self.labels_map.get(offset, None)
544
545    def get_argval_argrepr(self, op, arg, offset):
546        get_name = None if self.names is None else self.names.__getitem__
547        argval = None
548        argrepr = ''
549        deop = _deoptop(op)
550        if arg is not None:
551            #  Set argval to the dereferenced value of the argument when
552            #  available, and argrepr to the string representation of argval.
553            #    _disassemble_bytes needs the string repr of the
554            #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
555            argval = arg
556            if deop in hasconst:
557                argval, argrepr = _get_const_info(deop, arg, self.co_consts)
558            elif deop in hasname:
559                if deop == LOAD_GLOBAL:
560                    argval, argrepr = _get_name_info(arg//2, get_name)
561                    if (arg & 1) and argrepr:
562                        argrepr = f"{argrepr} + NULL"
563                elif deop == LOAD_ATTR:
564                    argval, argrepr = _get_name_info(arg//2, get_name)
565                    if (arg & 1) and argrepr:
566                        argrepr = f"{argrepr} + NULL|self"
567                elif deop == LOAD_SUPER_ATTR:
568                    argval, argrepr = _get_name_info(arg//4, get_name)
569                    if (arg & 1) and argrepr:
570                        argrepr = f"{argrepr} + NULL|self"
571                else:
572                    argval, argrepr = _get_name_info(arg, get_name)
573            elif deop in hasjump or deop in hasexc:
574                argval = self.offset_from_jump_arg(op, arg, offset)
575                lbl = self.get_label_for_offset(argval)
576                assert lbl is not None
577                argrepr = f"to L{lbl}"
578            elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST):
579                arg1 = arg >> 4
580                arg2 = arg & 15
581                val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg)
582                val2, argrepr2 = _get_name_info(arg2, self.varname_from_oparg)
583                argrepr = argrepr1 + ", " + argrepr2
584                argval = val1, val2
585            elif deop in haslocal or deop in hasfree:
586                argval, argrepr = _get_name_info(arg, self.varname_from_oparg)
587            elif deop in hascompare:
588                argval = cmp_op[arg >> 5]
589                argrepr = argval
590                if arg & 16:
591                    argrepr = f"bool({argrepr})"
592            elif deop == CONVERT_VALUE:
593                argval = (None, str, repr, ascii)[arg]
594                argrepr = ('', 'str', 'repr', 'ascii')[arg]
595            elif deop == SET_FUNCTION_ATTRIBUTE:
596                argrepr = ', '.join(s for i, s in enumerate(FUNCTION_ATTR_FLAGS)
597                                    if arg & (1<<i))
598            elif deop == BINARY_OP:
599                _, argrepr = _nb_ops[arg]
600            elif deop == CALL_INTRINSIC_1:
601                argrepr = _intrinsic_1_descs[arg]
602            elif deop == CALL_INTRINSIC_2:
603                argrepr = _intrinsic_2_descs[arg]
604        return argval, argrepr
605
606def get_instructions(x, *, first_line=None, show_caches=None, adaptive=False):
607    """Iterator for the opcodes in methods, functions or code
608
609    Generates a series of Instruction named tuples giving the details of
610    each operations in the supplied code.
611
612    If *first_line* is not None, it indicates the line number that should
613    be reported for the first source line in the disassembled code.
614    Otherwise, the source line information (if any) is taken directly from
615    the disassembled code object.
616    """
617    co = _get_code_object(x)
618    linestarts = dict(findlinestarts(co))
619    if first_line is not None:
620        line_offset = first_line - co.co_firstlineno
621    else:
622        line_offset = 0
623
624    original_code = co.co_code
625    arg_resolver = ArgResolver(co_consts=co.co_consts,
626                               names=co.co_names,
627                               varname_from_oparg=co._varname_from_oparg,
628                               labels_map=_make_labels_map(original_code))
629    return _get_instructions_bytes(_get_code_array(co, adaptive),
630                                   linestarts=linestarts,
631                                   line_offset=line_offset,
632                                   co_positions=co.co_positions(),
633                                   original_code=original_code,
634                                   arg_resolver=arg_resolver)
635
636def _get_const_value(op, arg, co_consts):
637    """Helper to get the value of the const in a hasconst op.
638
639       Returns the dereferenced constant if this is possible.
640       Otherwise (if it is a LOAD_CONST and co_consts is not
641       provided) returns the dis.UNKNOWN sentinel.
642    """
643    assert op in hasconst
644
645    argval = UNKNOWN
646    if co_consts is not None:
647        argval = co_consts[arg]
648    return argval
649
650def _get_const_info(op, arg, co_consts):
651    """Helper to get optional details about const references
652
653       Returns the dereferenced constant and its repr if the value
654       can be calculated.
655       Otherwise returns the sentinel value dis.UNKNOWN for the value
656       and an empty string for its repr.
657    """
658    argval = _get_const_value(op, arg, co_consts)
659    argrepr = repr(argval) if argval is not UNKNOWN else ''
660    return argval, argrepr
661
662def _get_name_info(name_index, get_name, **extrainfo):
663    """Helper to get optional details about named references
664
665       Returns the dereferenced name as both value and repr if the name
666       list is defined.
667       Otherwise returns the sentinel value dis.UNKNOWN for the value
668       and an empty string for its repr.
669    """
670    if get_name is not None:
671        argval = get_name(name_index, **extrainfo)
672        return argval, argval
673    else:
674        return UNKNOWN, ''
675
676def _parse_varint(iterator):
677    b = next(iterator)
678    val = b & 63
679    while b&64:
680        val <<= 6
681        b = next(iterator)
682        val |= b&63
683    return val
684
685def _parse_exception_table(code):
686    iterator = iter(code.co_exceptiontable)
687    entries = []
688    try:
689        while True:
690            start = _parse_varint(iterator)*2
691            length = _parse_varint(iterator)*2
692            end = start + length
693            target = _parse_varint(iterator)*2
694            dl = _parse_varint(iterator)
695            depth = dl >> 1
696            lasti = bool(dl&1)
697            entries.append(_ExceptionTableEntry(start, end, target, depth, lasti))
698    except StopIteration:
699        return entries
700
701def _is_backward_jump(op):
702    return opname[op] in ('JUMP_BACKWARD',
703                          'JUMP_BACKWARD_NO_INTERRUPT')
704
705def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None,
706                            original_code=None, arg_resolver=None):
707    """Iterate over the instructions in a bytecode string.
708
709    Generates a sequence of Instruction namedtuples giving the details of each
710    opcode.
711
712    """
713    # Use the basic, unadaptive code for finding labels and actually walking the
714    # bytecode, since replacements like ENTER_EXECUTOR and INSTRUMENTED_* can
715    # mess that logic up pretty badly:
716    original_code = original_code or code
717    co_positions = co_positions or iter(())
718
719    starts_line = False
720    local_line_number = None
721    line_number = None
722    for offset, start_offset, op, arg in _unpack_opargs(original_code):
723        if linestarts is not None:
724            starts_line = offset in linestarts
725            if starts_line:
726                local_line_number = linestarts[offset]
727            if local_line_number is not None:
728                line_number = local_line_number + line_offset
729            else:
730                line_number = None
731        positions = Positions(*next(co_positions, ()))
732        deop = _deoptop(op)
733        op = code[offset]
734
735        if arg_resolver:
736            argval, argrepr = arg_resolver.get_argval_argrepr(op, arg, offset)
737        else:
738            argval, argrepr = arg, repr(arg)
739
740        caches = _get_cache_size(_all_opname[deop])
741        # Advance the co_positions iterator:
742        for _ in range(caches):
743            next(co_positions, ())
744
745        if caches:
746            cache_info = []
747            for name, size in _cache_format[opname[deop]].items():
748                data = code[offset + 2: offset + 2 + 2 * size]
749                cache_info.append((name, size, data))
750        else:
751            cache_info = None
752
753        label = arg_resolver.get_label_for_offset(offset) if arg_resolver else None
754        yield Instruction(_all_opname[op], op, arg, argval, argrepr,
755                          offset, start_offset, starts_line, line_number,
756                          label, positions, cache_info)
757
758
759def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
760                show_offsets=False):
761    """Disassemble a code object."""
762    linestarts = dict(findlinestarts(co))
763    exception_entries = _parse_exception_table(co)
764    labels_map = _make_labels_map(co.co_code, exception_entries=exception_entries)
765    label_width = 4 + len(str(len(labels_map)))
766    formatter = Formatter(file=file,
767                          lineno_width=_get_lineno_width(linestarts),
768                          offset_width=len(str(max(len(co.co_code) - 2, 9999))) if show_offsets else 0,
769                          label_width=label_width,
770                          show_caches=show_caches)
771    arg_resolver = ArgResolver(co_consts=co.co_consts,
772                               names=co.co_names,
773                               varname_from_oparg=co._varname_from_oparg,
774                               labels_map=labels_map)
775    _disassemble_bytes(_get_code_array(co, adaptive), lasti, linestarts,
776                       exception_entries=exception_entries, co_positions=co.co_positions(),
777                       original_code=co.co_code, arg_resolver=arg_resolver, formatter=formatter)
778
779def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False):
780    disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
781    if depth is None or depth > 0:
782        if depth is not None:
783            depth = depth - 1
784        for x in co.co_consts:
785            if hasattr(x, 'co_code'):
786                print(file=file)
787                print("Disassembly of %r:" % (x,), file=file)
788                _disassemble_recursive(
789                    x, file=file, depth=depth, show_caches=show_caches,
790                    adaptive=adaptive, show_offsets=show_offsets
791                )
792
793
794def _make_labels_map(original_code, exception_entries=()):
795    jump_targets = set(findlabels(original_code))
796    labels = set(jump_targets)
797    for start, end, target, _, _ in exception_entries:
798        labels.add(start)
799        labels.add(end)
800        labels.add(target)
801    labels = sorted(labels)
802    labels_map = {offset: i+1 for (i, offset) in enumerate(sorted(labels))}
803    for e in exception_entries:
804        e.start_label = labels_map[e.start]
805        e.end_label = labels_map[e.end]
806        e.target_label = labels_map[e.target]
807    return labels_map
808
809_NO_LINENO = '  --'
810
811def _get_lineno_width(linestarts):
812    if linestarts is None:
813        return 0
814    maxlineno = max(filter(None, linestarts.values()), default=-1)
815    if maxlineno == -1:
816        # Omit the line number column entirely if we have no line number info
817        return 0
818    lineno_width = max(3, len(str(maxlineno)))
819    if lineno_width < len(_NO_LINENO) and None in linestarts.values():
820        lineno_width = len(_NO_LINENO)
821    return lineno_width
822
823
824def _disassemble_bytes(code, lasti=-1, linestarts=None,
825                       *, line_offset=0, exception_entries=(),
826                       co_positions=None, original_code=None,
827                       arg_resolver=None, formatter=None):
828
829    assert formatter is not None
830    assert arg_resolver is not None
831
832    instrs = _get_instructions_bytes(code, linestarts=linestarts,
833                                           line_offset=line_offset,
834                                           co_positions=co_positions,
835                                           original_code=original_code,
836                                           arg_resolver=arg_resolver)
837
838    print_instructions(instrs, exception_entries, formatter, lasti=lasti)
839
840
841def print_instructions(instrs, exception_entries, formatter, lasti=-1):
842    for instr in instrs:
843        # Each CACHE takes 2 bytes
844        is_current_instr = instr.offset <= lasti \
845            <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
846        formatter.print_instruction(instr, is_current_instr)
847
848    formatter.print_exception_table(exception_entries)
849
850def _disassemble_str(source, **kwargs):
851    """Compile the source string, then disassemble the code object."""
852    _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs)
853
854disco = disassemble                     # XXX For backwards compatibility
855
856
857# Rely on C `int` being 32 bits for oparg
858_INT_BITS = 32
859# Value for c int when it overflows
860_INT_OVERFLOW = 2 ** (_INT_BITS - 1)
861
862def _unpack_opargs(code):
863    extended_arg = 0
864    extended_args_offset = 0  # Number of EXTENDED_ARG instructions preceding the current instruction
865    caches = 0
866    for i in range(0, len(code), 2):
867        # Skip inline CACHE entries:
868        if caches:
869            caches -= 1
870            continue
871        op = code[i]
872        deop = _deoptop(op)
873        caches = _get_cache_size(_all_opname[deop])
874        if deop in hasarg:
875            arg = code[i+1] | extended_arg
876            extended_arg = (arg << 8) if deop == EXTENDED_ARG else 0
877            # The oparg is stored as a signed integer
878            # If the value exceeds its upper limit, it will overflow and wrap
879            # to a negative integer
880            if extended_arg >= _INT_OVERFLOW:
881                extended_arg -= 2 * _INT_OVERFLOW
882        else:
883            arg = None
884            extended_arg = 0
885        if deop == EXTENDED_ARG:
886            extended_args_offset += 1
887            yield (i, i, op, arg)
888        else:
889            start_offset = i - extended_args_offset*2
890            yield (i, start_offset, op, arg)
891            extended_args_offset = 0
892
893def findlabels(code):
894    """Detect all offsets in a byte code which are jump targets.
895
896    Return the list of offsets.
897
898    """
899    labels = []
900    for offset, _, op, arg in _unpack_opargs(code):
901        if arg is not None:
902            label = _get_jump_target(op, arg, offset)
903            if label is None:
904                continue
905            if label not in labels:
906                labels.append(label)
907    return labels
908
909def findlinestarts(code):
910    """Find the offsets in a byte code which are start of lines in the source.
911
912    Generate pairs (offset, lineno)
913    lineno will be an integer or None the offset does not have a source line.
914    """
915
916    lastline = False # None is a valid line number
917    for start, end, line in code.co_lines():
918        if line is not lastline:
919            lastline = line
920            yield start, line
921    return
922
923def _find_imports(co):
924    """Find import statements in the code
925
926    Generate triplets (name, level, fromlist) where
927    name is the imported module and level, fromlist are
928    the corresponding args to __import__.
929    """
930    IMPORT_NAME = opmap['IMPORT_NAME']
931
932    consts = co.co_consts
933    names = co.co_names
934    opargs = [(op, arg) for _, _, op, arg in _unpack_opargs(co.co_code)
935                  if op != EXTENDED_ARG]
936    for i, (op, oparg) in enumerate(opargs):
937        if op == IMPORT_NAME and i >= 2:
938            from_op = opargs[i-1]
939            level_op = opargs[i-2]
940            if (from_op[0] in hasconst and level_op[0] in hasconst):
941                level = _get_const_value(level_op[0], level_op[1], consts)
942                fromlist = _get_const_value(from_op[0], from_op[1], consts)
943                yield (names[oparg], level, fromlist)
944
945def _find_store_names(co):
946    """Find names of variables which are written in the code
947
948    Generate sequence of strings
949    """
950    STORE_OPS = {
951        opmap['STORE_NAME'],
952        opmap['STORE_GLOBAL']
953    }
954
955    names = co.co_names
956    for _, _, op, arg in _unpack_opargs(co.co_code):
957        if op in STORE_OPS:
958            yield names[arg]
959
960
961class Bytecode:
962    """The bytecode operations of a piece of code
963
964    Instantiate this with a function, method, other compiled object, string of
965    code, or a code object (as returned by compile()).
966
967    Iterating over this yields the bytecode operations as Instruction instances.
968    """
969    def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False, show_offsets=False):
970        self.codeobj = co = _get_code_object(x)
971        if first_line is None:
972            self.first_line = co.co_firstlineno
973            self._line_offset = 0
974        else:
975            self.first_line = first_line
976            self._line_offset = first_line - co.co_firstlineno
977        self._linestarts = dict(findlinestarts(co))
978        self._original_object = x
979        self.current_offset = current_offset
980        self.exception_entries = _parse_exception_table(co)
981        self.show_caches = show_caches
982        self.adaptive = adaptive
983        self.show_offsets = show_offsets
984
985    def __iter__(self):
986        co = self.codeobj
987        original_code = co.co_code
988        labels_map = _make_labels_map(original_code, self.exception_entries)
989        arg_resolver = ArgResolver(co_consts=co.co_consts,
990                                   names=co.co_names,
991                                   varname_from_oparg=co._varname_from_oparg,
992                                   labels_map=labels_map)
993        return _get_instructions_bytes(_get_code_array(co, self.adaptive),
994                                       linestarts=self._linestarts,
995                                       line_offset=self._line_offset,
996                                       co_positions=co.co_positions(),
997                                       original_code=original_code,
998                                       arg_resolver=arg_resolver)
999
1000    def __repr__(self):
1001        return "{}({!r})".format(self.__class__.__name__,
1002                                 self._original_object)
1003
1004    @classmethod
1005    def from_traceback(cls, tb, *, show_caches=False, adaptive=False):
1006        """ Construct a Bytecode from the given traceback """
1007        while tb.tb_next:
1008            tb = tb.tb_next
1009        return cls(
1010            tb.tb_frame.f_code, current_offset=tb.tb_lasti, show_caches=show_caches, adaptive=adaptive
1011        )
1012
1013    def info(self):
1014        """Return formatted information about the code object."""
1015        return _format_code_info(self.codeobj)
1016
1017    def dis(self):
1018        """Return a formatted view of the bytecode operations."""
1019        co = self.codeobj
1020        if self.current_offset is not None:
1021            offset = self.current_offset
1022        else:
1023            offset = -1
1024        with io.StringIO() as output:
1025            code = _get_code_array(co, self.adaptive)
1026            offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0
1027
1028
1029            labels_map = _make_labels_map(co.co_code, self.exception_entries)
1030            label_width = 4 + len(str(len(labels_map)))
1031            formatter = Formatter(file=output,
1032                                  lineno_width=_get_lineno_width(self._linestarts),
1033                                  offset_width=offset_width,
1034                                  label_width=label_width,
1035                                  line_offset=self._line_offset,
1036                                  show_caches=self.show_caches)
1037
1038            arg_resolver = ArgResolver(co_consts=co.co_consts,
1039                                       names=co.co_names,
1040                                       varname_from_oparg=co._varname_from_oparg,
1041                                       labels_map=labels_map)
1042            _disassemble_bytes(code,
1043                               linestarts=self._linestarts,
1044                               line_offset=self._line_offset,
1045                               lasti=offset,
1046                               exception_entries=self.exception_entries,
1047                               co_positions=co.co_positions(),
1048                               original_code=co.co_code,
1049                               arg_resolver=arg_resolver,
1050                               formatter=formatter)
1051            return output.getvalue()
1052
1053
1054def main():
1055    import argparse
1056
1057    parser = argparse.ArgumentParser()
1058    parser.add_argument('-C', '--show-caches', action='store_true',
1059                        help='show inline caches')
1060    parser.add_argument('-O', '--show-offsets', action='store_true',
1061                        help='show instruction offsets')
1062    parser.add_argument('infile', nargs='?', default='-')
1063    args = parser.parse_args()
1064    if args.infile == '-':
1065        name = '<stdin>'
1066        source = sys.stdin.buffer.read()
1067    else:
1068        name = args.infile
1069        with open(args.infile, 'rb') as infile:
1070            source = infile.read()
1071    code = compile(source, name, "exec")
1072    dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets)
1073
1074if __name__ == "__main__":
1075    main()
1076