1"""Disassembler of Python byte code into mnemonics.""" 2 3import sys 4import types 5import collections 6import io 7 8from opcode import * 9from opcode import ( 10 __all__ as _opcodes_all, 11 _cache_format, 12 _inline_cache_entries, 13 _nb_ops, 14 _intrinsic_1_descs, 15 _intrinsic_2_descs, 16 _specializations, 17 _specialized_opmap, 18) 19 20from _opcode import get_executor 21 22__all__ = ["code_info", "dis", "disassemble", "distb", "disco", 23 "findlinestarts", "findlabels", "show_code", 24 "get_instructions", "Instruction", "Bytecode"] + _opcodes_all 25del _opcodes_all 26 27_have_code = (types.MethodType, types.FunctionType, types.CodeType, 28 classmethod, staticmethod, type) 29 30CONVERT_VALUE = opmap['CONVERT_VALUE'] 31 32SET_FUNCTION_ATTRIBUTE = opmap['SET_FUNCTION_ATTRIBUTE'] 33FUNCTION_ATTR_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure') 34 35ENTER_EXECUTOR = opmap['ENTER_EXECUTOR'] 36LOAD_CONST = opmap['LOAD_CONST'] 37RETURN_CONST = opmap['RETURN_CONST'] 38LOAD_GLOBAL = opmap['LOAD_GLOBAL'] 39BINARY_OP = opmap['BINARY_OP'] 40JUMP_BACKWARD = opmap['JUMP_BACKWARD'] 41FOR_ITER = opmap['FOR_ITER'] 42SEND = opmap['SEND'] 43LOAD_ATTR = opmap['LOAD_ATTR'] 44LOAD_SUPER_ATTR = opmap['LOAD_SUPER_ATTR'] 45CALL_INTRINSIC_1 = opmap['CALL_INTRINSIC_1'] 46CALL_INTRINSIC_2 = opmap['CALL_INTRINSIC_2'] 47LOAD_FAST_LOAD_FAST = opmap['LOAD_FAST_LOAD_FAST'] 48STORE_FAST_LOAD_FAST = opmap['STORE_FAST_LOAD_FAST'] 49STORE_FAST_STORE_FAST = opmap['STORE_FAST_STORE_FAST'] 50 51CACHE = opmap["CACHE"] 52 53_all_opname = list(opname) 54_all_opmap = dict(opmap) 55for name, op in _specialized_opmap.items(): 56 # fill opname and opmap 57 assert op < len(_all_opname) 58 _all_opname[op] = name 59 _all_opmap[name] = op 60 61deoptmap = { 62 specialized: base for base, family in _specializations.items() for specialized in family 63} 64 65def _try_compile(source, name): 66 """Attempts to compile the given source, first as an expression and 67 then as a statement if the first approach fails. 68 69 Utility function to accept strings in functions that otherwise 70 expect code objects 71 """ 72 try: 73 return compile(source, name, 'eval') 74 except SyntaxError: 75 pass 76 return compile(source, name, 'exec') 77 78def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, 79 show_offsets=False): 80 """Disassemble classes, methods, functions, and other compiled objects. 81 82 With no argument, disassemble the last traceback. 83 84 Compiled objects currently include generator objects, async generator 85 objects, and coroutine objects, all of which store their code object 86 in a special attribute. 87 """ 88 if x is None: 89 distb(file=file, show_caches=show_caches, adaptive=adaptive, 90 show_offsets=show_offsets) 91 return 92 # Extract functions from methods. 93 if hasattr(x, '__func__'): 94 x = x.__func__ 95 # Extract compiled code objects from... 96 if hasattr(x, '__code__'): # ...a function, or 97 x = x.__code__ 98 elif hasattr(x, 'gi_code'): #...a generator object, or 99 x = x.gi_code 100 elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or 101 x = x.ag_code 102 elif hasattr(x, 'cr_code'): #...a coroutine. 103 x = x.cr_code 104 # Perform the disassembly. 105 if hasattr(x, '__dict__'): # Class or module 106 items = sorted(x.__dict__.items()) 107 for name, x1 in items: 108 if isinstance(x1, _have_code): 109 print("Disassembly of %s:" % name, file=file) 110 try: 111 dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) 112 except TypeError as msg: 113 print("Sorry:", msg, file=file) 114 print(file=file) 115 elif hasattr(x, 'co_code'): # Code object 116 _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) 117 elif isinstance(x, (bytes, bytearray)): # Raw bytecode 118 labels_map = _make_labels_map(x) 119 label_width = 4 + len(str(len(labels_map))) 120 formatter = Formatter(file=file, 121 offset_width=len(str(max(len(x) - 2, 9999))) if show_offsets else 0, 122 label_width=label_width, 123 show_caches=show_caches) 124 arg_resolver = ArgResolver(labels_map=labels_map) 125 _disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter) 126 elif isinstance(x, str): # Source code 127 _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) 128 else: 129 raise TypeError("don't know how to disassemble %s objects" % 130 type(x).__name__) 131 132def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets=False): 133 """Disassemble a traceback (default: last traceback).""" 134 if tb is None: 135 try: 136 if hasattr(sys, 'last_exc'): 137 tb = sys.last_exc.__traceback__ 138 else: 139 tb = sys.last_traceback 140 except AttributeError: 141 raise RuntimeError("no last traceback to disassemble") from None 142 while tb.tb_next: tb = tb.tb_next 143 disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) 144 145# The inspect module interrogates this dictionary to build its 146# list of CO_* constants. It is also used by pretty_flags to 147# turn the co_flags field into a human readable list. 148COMPILER_FLAG_NAMES = { 149 1: "OPTIMIZED", 150 2: "NEWLOCALS", 151 4: "VARARGS", 152 8: "VARKEYWORDS", 153 16: "NESTED", 154 32: "GENERATOR", 155 64: "NOFREE", 156 128: "COROUTINE", 157 256: "ITERABLE_COROUTINE", 158 512: "ASYNC_GENERATOR", 159} 160 161def pretty_flags(flags): 162 """Return pretty representation of code flags.""" 163 names = [] 164 for i in range(32): 165 flag = 1<<i 166 if flags & flag: 167 names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag))) 168 flags ^= flag 169 if not flags: 170 break 171 else: 172 names.append(hex(flags)) 173 return ", ".join(names) 174 175class _Unknown: 176 def __repr__(self): 177 return "<unknown>" 178 179# Sentinel to represent values that cannot be calculated 180UNKNOWN = _Unknown() 181 182def _get_code_object(x): 183 """Helper to handle methods, compiled or raw code objects, and strings.""" 184 # Extract functions from methods. 185 if hasattr(x, '__func__'): 186 x = x.__func__ 187 # Extract compiled code objects from... 188 if hasattr(x, '__code__'): # ...a function, or 189 x = x.__code__ 190 elif hasattr(x, 'gi_code'): #...a generator object, or 191 x = x.gi_code 192 elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or 193 x = x.ag_code 194 elif hasattr(x, 'cr_code'): #...a coroutine. 195 x = x.cr_code 196 # Handle source code. 197 if isinstance(x, str): 198 x = _try_compile(x, "<disassembly>") 199 # By now, if we don't have a code object, we can't disassemble x. 200 if hasattr(x, 'co_code'): 201 return x 202 raise TypeError("don't know how to disassemble %s objects" % 203 type(x).__name__) 204 205def _deoptop(op): 206 name = _all_opname[op] 207 return _all_opmap[deoptmap[name]] if name in deoptmap else op 208 209def _get_code_array(co, adaptive): 210 if adaptive: 211 code = co._co_code_adaptive 212 res = [] 213 found = False 214 for i in range(0, len(code), 2): 215 op, arg = code[i], code[i+1] 216 if op == ENTER_EXECUTOR: 217 try: 218 ex = get_executor(co, i) 219 except (ValueError, RuntimeError): 220 ex = None 221 222 if ex: 223 op, arg = ex.get_opcode(), ex.get_oparg() 224 found = True 225 226 res.append(op.to_bytes()) 227 res.append(arg.to_bytes()) 228 return code if not found else b''.join(res) 229 else: 230 return co.co_code 231 232def code_info(x): 233 """Formatted details of methods, functions, or code.""" 234 return _format_code_info(_get_code_object(x)) 235 236def _format_code_info(co): 237 lines = [] 238 lines.append("Name: %s" % co.co_name) 239 lines.append("Filename: %s" % co.co_filename) 240 lines.append("Argument count: %s" % co.co_argcount) 241 lines.append("Positional-only arguments: %s" % co.co_posonlyargcount) 242 lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) 243 lines.append("Number of locals: %s" % co.co_nlocals) 244 lines.append("Stack size: %s" % co.co_stacksize) 245 lines.append("Flags: %s" % pretty_flags(co.co_flags)) 246 if co.co_consts: 247 lines.append("Constants:") 248 for i_c in enumerate(co.co_consts): 249 lines.append("%4d: %r" % i_c) 250 if co.co_names: 251 lines.append("Names:") 252 for i_n in enumerate(co.co_names): 253 lines.append("%4d: %s" % i_n) 254 if co.co_varnames: 255 lines.append("Variable names:") 256 for i_n in enumerate(co.co_varnames): 257 lines.append("%4d: %s" % i_n) 258 if co.co_freevars: 259 lines.append("Free variables:") 260 for i_n in enumerate(co.co_freevars): 261 lines.append("%4d: %s" % i_n) 262 if co.co_cellvars: 263 lines.append("Cell variables:") 264 for i_n in enumerate(co.co_cellvars): 265 lines.append("%4d: %s" % i_n) 266 return "\n".join(lines) 267 268def show_code(co, *, file=None): 269 """Print details of methods, functions, or code to *file*. 270 271 If *file* is not provided, the output is printed on stdout. 272 """ 273 print(code_info(co), file=file) 274 275Positions = collections.namedtuple( 276 'Positions', 277 [ 278 'lineno', 279 'end_lineno', 280 'col_offset', 281 'end_col_offset', 282 ], 283 defaults=[None] * 4 284) 285 286_Instruction = collections.namedtuple( 287 "_Instruction", 288 [ 289 'opname', 290 'opcode', 291 'arg', 292 'argval', 293 'argrepr', 294 'offset', 295 'start_offset', 296 'starts_line', 297 'line_number', 298 'label', 299 'positions', 300 'cache_info', 301 ], 302 defaults=[None, None, None] 303) 304 305_Instruction.opname.__doc__ = "Human readable name for operation" 306_Instruction.opcode.__doc__ = "Numeric code for operation" 307_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None" 308_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg" 309_Instruction.argrepr.__doc__ = "Human readable description of operation argument" 310_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence" 311_Instruction.start_offset.__doc__ = ( 312 "Start index of operation within bytecode sequence, including extended args if present; " 313 "otherwise equal to Instruction.offset" 314) 315_Instruction.starts_line.__doc__ = "True if this opcode starts a source line, otherwise False" 316_Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None" 317_Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None" 318_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction" 319_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction" 320 321_ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase", 322 "start end target depth lasti") 323 324class _ExceptionTableEntry(_ExceptionTableEntryBase): 325 pass 326 327_OPNAME_WIDTH = 20 328_OPARG_WIDTH = 5 329 330def _get_cache_size(opname): 331 return _inline_cache_entries.get(opname, 0) 332 333def _get_jump_target(op, arg, offset): 334 """Gets the bytecode offset of the jump target if this is a jump instruction. 335 336 Otherwise return None. 337 """ 338 deop = _deoptop(op) 339 caches = _get_cache_size(_all_opname[deop]) 340 if deop in hasjrel: 341 if _is_backward_jump(deop): 342 arg = -arg 343 target = offset + 2 + arg*2 344 target += 2 * caches 345 elif deop in hasjabs: 346 target = arg*2 347 else: 348 target = None 349 return target 350 351class Instruction(_Instruction): 352 """Details for a bytecode operation. 353 354 Defined fields: 355 opname - human readable name for operation 356 opcode - numeric code for operation 357 arg - numeric argument to operation (if any), otherwise None 358 argval - resolved arg value (if known), otherwise same as arg 359 argrepr - human readable description of operation argument 360 offset - start index of operation within bytecode sequence 361 start_offset - start index of operation within bytecode sequence including extended args if present; 362 otherwise equal to Instruction.offset 363 starts_line - True if this opcode starts a source line, otherwise False 364 line_number - source line number associated with this opcode (if any), otherwise None 365 label - A label if this instruction is a jump target, otherwise None 366 positions - Optional dis.Positions object holding the span of source code 367 covered by this instruction 368 cache_info - information about the format and content of the instruction's cache 369 entries (if any) 370 """ 371 372 @property 373 def oparg(self): 374 """Alias for Instruction.arg.""" 375 return self.arg 376 377 @property 378 def baseopcode(self): 379 """Numeric code for the base operation if operation is specialized. 380 381 Otherwise equal to Instruction.opcode. 382 """ 383 return _deoptop(self.opcode) 384 385 @property 386 def baseopname(self): 387 """Human readable name for the base operation if operation is specialized. 388 389 Otherwise equal to Instruction.opname. 390 """ 391 return opname[self.baseopcode] 392 393 @property 394 def cache_offset(self): 395 """Start index of the cache entries following the operation.""" 396 return self.offset + 2 397 398 @property 399 def end_offset(self): 400 """End index of the cache entries following the operation.""" 401 return self.cache_offset + _get_cache_size(_all_opname[self.opcode])*2 402 403 @property 404 def jump_target(self): 405 """Bytecode index of the jump target if this is a jump operation. 406 407 Otherwise return None. 408 """ 409 return _get_jump_target(self.opcode, self.arg, self.offset) 410 411 @property 412 def is_jump_target(self): 413 """True if other code jumps to here, otherwise False""" 414 return self.label is not None 415 416 def __str__(self): 417 output = io.StringIO() 418 formatter = Formatter(file=output) 419 formatter.print_instruction(self, False) 420 return output.getvalue() 421 422 423class Formatter: 424 425 def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0, 426 line_offset=0, show_caches=False): 427 """Create a Formatter 428 429 *file* where to write the output 430 *lineno_width* sets the width of the line number field (0 omits it) 431 *offset_width* sets the width of the instruction offset field 432 *label_width* sets the width of the label field 433 *show_caches* is a boolean indicating whether to display cache lines 434 435 """ 436 self.file = file 437 self.lineno_width = lineno_width 438 self.offset_width = offset_width 439 self.label_width = label_width 440 self.show_caches = show_caches 441 442 def print_instruction(self, instr, mark_as_current=False): 443 self.print_instruction_line(instr, mark_as_current) 444 if self.show_caches and instr.cache_info: 445 offset = instr.offset 446 for name, size, data in instr.cache_info: 447 for i in range(size): 448 offset += 2 449 # Only show the fancy argrepr for a CACHE instruction when it's 450 # the first entry for a particular cache value: 451 if i == 0: 452 argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" 453 else: 454 argrepr = "" 455 self.print_instruction_line( 456 Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, 457 False, None, None, instr.positions), 458 False) 459 460 def print_instruction_line(self, instr, mark_as_current): 461 """Format instruction details for inclusion in disassembly output.""" 462 lineno_width = self.lineno_width 463 offset_width = self.offset_width 464 label_width = self.label_width 465 466 new_source_line = (lineno_width > 0 and 467 instr.starts_line and 468 instr.offset > 0) 469 if new_source_line: 470 print(file=self.file) 471 472 fields = [] 473 # Column: Source code line number 474 if lineno_width: 475 if instr.starts_line: 476 lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds" 477 lineno_fmt = lineno_fmt % lineno_width 478 lineno = _NO_LINENO if instr.line_number is None else instr.line_number 479 fields.append(lineno_fmt % lineno) 480 else: 481 fields.append(' ' * lineno_width) 482 # Column: Label 483 if instr.label is not None: 484 lbl = f"L{instr.label}:" 485 fields.append(f"{lbl:>{label_width}}") 486 else: 487 fields.append(' ' * label_width) 488 # Column: Instruction offset from start of code sequence 489 if offset_width > 0: 490 fields.append(f"{repr(instr.offset):>{offset_width}} ") 491 # Column: Current instruction indicator 492 if mark_as_current: 493 fields.append('-->') 494 else: 495 fields.append(' ') 496 # Column: Opcode name 497 fields.append(instr.opname.ljust(_OPNAME_WIDTH)) 498 # Column: Opcode argument 499 if instr.arg is not None: 500 arg = repr(instr.arg) 501 # If opname is longer than _OPNAME_WIDTH, we allow it to overflow into 502 # the space reserved for oparg. This results in fewer misaligned opargs 503 # in the disassembly output. 504 opname_excess = max(0, len(instr.opname) - _OPNAME_WIDTH) 505 fields.append(repr(instr.arg).rjust(_OPARG_WIDTH - opname_excess)) 506 # Column: Opcode argument details 507 if instr.argrepr: 508 fields.append('(' + instr.argrepr + ')') 509 print(' '.join(fields).rstrip(), file=self.file) 510 511 def print_exception_table(self, exception_entries): 512 file = self.file 513 if exception_entries: 514 print("ExceptionTable:", file=file) 515 for entry in exception_entries: 516 lasti = " lasti" if entry.lasti else "" 517 start = entry.start_label 518 end = entry.end_label 519 target = entry.target_label 520 print(f" L{start} to L{end} -> L{target} [{entry.depth}]{lasti}", file=file) 521 522 523class ArgResolver: 524 def __init__(self, co_consts=None, names=None, varname_from_oparg=None, labels_map=None): 525 self.co_consts = co_consts 526 self.names = names 527 self.varname_from_oparg = varname_from_oparg 528 self.labels_map = labels_map or {} 529 530 def offset_from_jump_arg(self, op, arg, offset): 531 deop = _deoptop(op) 532 if deop in hasjabs: 533 return arg * 2 534 elif deop in hasjrel: 535 signed_arg = -arg if _is_backward_jump(deop) else arg 536 argval = offset + 2 + signed_arg*2 537 caches = _get_cache_size(_all_opname[deop]) 538 argval += 2 * caches 539 return argval 540 return None 541 542 def get_label_for_offset(self, offset): 543 return self.labels_map.get(offset, None) 544 545 def get_argval_argrepr(self, op, arg, offset): 546 get_name = None if self.names is None else self.names.__getitem__ 547 argval = None 548 argrepr = '' 549 deop = _deoptop(op) 550 if arg is not None: 551 # Set argval to the dereferenced value of the argument when 552 # available, and argrepr to the string representation of argval. 553 # _disassemble_bytes needs the string repr of the 554 # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. 555 argval = arg 556 if deop in hasconst: 557 argval, argrepr = _get_const_info(deop, arg, self.co_consts) 558 elif deop in hasname: 559 if deop == LOAD_GLOBAL: 560 argval, argrepr = _get_name_info(arg//2, get_name) 561 if (arg & 1) and argrepr: 562 argrepr = f"{argrepr} + NULL" 563 elif deop == LOAD_ATTR: 564 argval, argrepr = _get_name_info(arg//2, get_name) 565 if (arg & 1) and argrepr: 566 argrepr = f"{argrepr} + NULL|self" 567 elif deop == LOAD_SUPER_ATTR: 568 argval, argrepr = _get_name_info(arg//4, get_name) 569 if (arg & 1) and argrepr: 570 argrepr = f"{argrepr} + NULL|self" 571 else: 572 argval, argrepr = _get_name_info(arg, get_name) 573 elif deop in hasjump or deop in hasexc: 574 argval = self.offset_from_jump_arg(op, arg, offset) 575 lbl = self.get_label_for_offset(argval) 576 assert lbl is not None 577 argrepr = f"to L{lbl}" 578 elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): 579 arg1 = arg >> 4 580 arg2 = arg & 15 581 val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg) 582 val2, argrepr2 = _get_name_info(arg2, self.varname_from_oparg) 583 argrepr = argrepr1 + ", " + argrepr2 584 argval = val1, val2 585 elif deop in haslocal or deop in hasfree: 586 argval, argrepr = _get_name_info(arg, self.varname_from_oparg) 587 elif deop in hascompare: 588 argval = cmp_op[arg >> 5] 589 argrepr = argval 590 if arg & 16: 591 argrepr = f"bool({argrepr})" 592 elif deop == CONVERT_VALUE: 593 argval = (None, str, repr, ascii)[arg] 594 argrepr = ('', 'str', 'repr', 'ascii')[arg] 595 elif deop == SET_FUNCTION_ATTRIBUTE: 596 argrepr = ', '.join(s for i, s in enumerate(FUNCTION_ATTR_FLAGS) 597 if arg & (1<<i)) 598 elif deop == BINARY_OP: 599 _, argrepr = _nb_ops[arg] 600 elif deop == CALL_INTRINSIC_1: 601 argrepr = _intrinsic_1_descs[arg] 602 elif deop == CALL_INTRINSIC_2: 603 argrepr = _intrinsic_2_descs[arg] 604 return argval, argrepr 605 606def get_instructions(x, *, first_line=None, show_caches=None, adaptive=False): 607 """Iterator for the opcodes in methods, functions or code 608 609 Generates a series of Instruction named tuples giving the details of 610 each operations in the supplied code. 611 612 If *first_line* is not None, it indicates the line number that should 613 be reported for the first source line in the disassembled code. 614 Otherwise, the source line information (if any) is taken directly from 615 the disassembled code object. 616 """ 617 co = _get_code_object(x) 618 linestarts = dict(findlinestarts(co)) 619 if first_line is not None: 620 line_offset = first_line - co.co_firstlineno 621 else: 622 line_offset = 0 623 624 original_code = co.co_code 625 arg_resolver = ArgResolver(co_consts=co.co_consts, 626 names=co.co_names, 627 varname_from_oparg=co._varname_from_oparg, 628 labels_map=_make_labels_map(original_code)) 629 return _get_instructions_bytes(_get_code_array(co, adaptive), 630 linestarts=linestarts, 631 line_offset=line_offset, 632 co_positions=co.co_positions(), 633 original_code=original_code, 634 arg_resolver=arg_resolver) 635 636def _get_const_value(op, arg, co_consts): 637 """Helper to get the value of the const in a hasconst op. 638 639 Returns the dereferenced constant if this is possible. 640 Otherwise (if it is a LOAD_CONST and co_consts is not 641 provided) returns the dis.UNKNOWN sentinel. 642 """ 643 assert op in hasconst 644 645 argval = UNKNOWN 646 if co_consts is not None: 647 argval = co_consts[arg] 648 return argval 649 650def _get_const_info(op, arg, co_consts): 651 """Helper to get optional details about const references 652 653 Returns the dereferenced constant and its repr if the value 654 can be calculated. 655 Otherwise returns the sentinel value dis.UNKNOWN for the value 656 and an empty string for its repr. 657 """ 658 argval = _get_const_value(op, arg, co_consts) 659 argrepr = repr(argval) if argval is not UNKNOWN else '' 660 return argval, argrepr 661 662def _get_name_info(name_index, get_name, **extrainfo): 663 """Helper to get optional details about named references 664 665 Returns the dereferenced name as both value and repr if the name 666 list is defined. 667 Otherwise returns the sentinel value dis.UNKNOWN for the value 668 and an empty string for its repr. 669 """ 670 if get_name is not None: 671 argval = get_name(name_index, **extrainfo) 672 return argval, argval 673 else: 674 return UNKNOWN, '' 675 676def _parse_varint(iterator): 677 b = next(iterator) 678 val = b & 63 679 while b&64: 680 val <<= 6 681 b = next(iterator) 682 val |= b&63 683 return val 684 685def _parse_exception_table(code): 686 iterator = iter(code.co_exceptiontable) 687 entries = [] 688 try: 689 while True: 690 start = _parse_varint(iterator)*2 691 length = _parse_varint(iterator)*2 692 end = start + length 693 target = _parse_varint(iterator)*2 694 dl = _parse_varint(iterator) 695 depth = dl >> 1 696 lasti = bool(dl&1) 697 entries.append(_ExceptionTableEntry(start, end, target, depth, lasti)) 698 except StopIteration: 699 return entries 700 701def _is_backward_jump(op): 702 return opname[op] in ('JUMP_BACKWARD', 703 'JUMP_BACKWARD_NO_INTERRUPT') 704 705def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None, 706 original_code=None, arg_resolver=None): 707 """Iterate over the instructions in a bytecode string. 708 709 Generates a sequence of Instruction namedtuples giving the details of each 710 opcode. 711 712 """ 713 # Use the basic, unadaptive code for finding labels and actually walking the 714 # bytecode, since replacements like ENTER_EXECUTOR and INSTRUMENTED_* can 715 # mess that logic up pretty badly: 716 original_code = original_code or code 717 co_positions = co_positions or iter(()) 718 719 starts_line = False 720 local_line_number = None 721 line_number = None 722 for offset, start_offset, op, arg in _unpack_opargs(original_code): 723 if linestarts is not None: 724 starts_line = offset in linestarts 725 if starts_line: 726 local_line_number = linestarts[offset] 727 if local_line_number is not None: 728 line_number = local_line_number + line_offset 729 else: 730 line_number = None 731 positions = Positions(*next(co_positions, ())) 732 deop = _deoptop(op) 733 op = code[offset] 734 735 if arg_resolver: 736 argval, argrepr = arg_resolver.get_argval_argrepr(op, arg, offset) 737 else: 738 argval, argrepr = arg, repr(arg) 739 740 caches = _get_cache_size(_all_opname[deop]) 741 # Advance the co_positions iterator: 742 for _ in range(caches): 743 next(co_positions, ()) 744 745 if caches: 746 cache_info = [] 747 for name, size in _cache_format[opname[deop]].items(): 748 data = code[offset + 2: offset + 2 + 2 * size] 749 cache_info.append((name, size, data)) 750 else: 751 cache_info = None 752 753 label = arg_resolver.get_label_for_offset(offset) if arg_resolver else None 754 yield Instruction(_all_opname[op], op, arg, argval, argrepr, 755 offset, start_offset, starts_line, line_number, 756 label, positions, cache_info) 757 758 759def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False, 760 show_offsets=False): 761 """Disassemble a code object.""" 762 linestarts = dict(findlinestarts(co)) 763 exception_entries = _parse_exception_table(co) 764 labels_map = _make_labels_map(co.co_code, exception_entries=exception_entries) 765 label_width = 4 + len(str(len(labels_map))) 766 formatter = Formatter(file=file, 767 lineno_width=_get_lineno_width(linestarts), 768 offset_width=len(str(max(len(co.co_code) - 2, 9999))) if show_offsets else 0, 769 label_width=label_width, 770 show_caches=show_caches) 771 arg_resolver = ArgResolver(co_consts=co.co_consts, 772 names=co.co_names, 773 varname_from_oparg=co._varname_from_oparg, 774 labels_map=labels_map) 775 _disassemble_bytes(_get_code_array(co, adaptive), lasti, linestarts, 776 exception_entries=exception_entries, co_positions=co.co_positions(), 777 original_code=co.co_code, arg_resolver=arg_resolver, formatter=formatter) 778 779def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False): 780 disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets) 781 if depth is None or depth > 0: 782 if depth is not None: 783 depth = depth - 1 784 for x in co.co_consts: 785 if hasattr(x, 'co_code'): 786 print(file=file) 787 print("Disassembly of %r:" % (x,), file=file) 788 _disassemble_recursive( 789 x, file=file, depth=depth, show_caches=show_caches, 790 adaptive=adaptive, show_offsets=show_offsets 791 ) 792 793 794def _make_labels_map(original_code, exception_entries=()): 795 jump_targets = set(findlabels(original_code)) 796 labels = set(jump_targets) 797 for start, end, target, _, _ in exception_entries: 798 labels.add(start) 799 labels.add(end) 800 labels.add(target) 801 labels = sorted(labels) 802 labels_map = {offset: i+1 for (i, offset) in enumerate(sorted(labels))} 803 for e in exception_entries: 804 e.start_label = labels_map[e.start] 805 e.end_label = labels_map[e.end] 806 e.target_label = labels_map[e.target] 807 return labels_map 808 809_NO_LINENO = ' --' 810 811def _get_lineno_width(linestarts): 812 if linestarts is None: 813 return 0 814 maxlineno = max(filter(None, linestarts.values()), default=-1) 815 if maxlineno == -1: 816 # Omit the line number column entirely if we have no line number info 817 return 0 818 lineno_width = max(3, len(str(maxlineno))) 819 if lineno_width < len(_NO_LINENO) and None in linestarts.values(): 820 lineno_width = len(_NO_LINENO) 821 return lineno_width 822 823 824def _disassemble_bytes(code, lasti=-1, linestarts=None, 825 *, line_offset=0, exception_entries=(), 826 co_positions=None, original_code=None, 827 arg_resolver=None, formatter=None): 828 829 assert formatter is not None 830 assert arg_resolver is not None 831 832 instrs = _get_instructions_bytes(code, linestarts=linestarts, 833 line_offset=line_offset, 834 co_positions=co_positions, 835 original_code=original_code, 836 arg_resolver=arg_resolver) 837 838 print_instructions(instrs, exception_entries, formatter, lasti=lasti) 839 840 841def print_instructions(instrs, exception_entries, formatter, lasti=-1): 842 for instr in instrs: 843 # Each CACHE takes 2 bytes 844 is_current_instr = instr.offset <= lasti \ 845 <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) 846 formatter.print_instruction(instr, is_current_instr) 847 848 formatter.print_exception_table(exception_entries) 849 850def _disassemble_str(source, **kwargs): 851 """Compile the source string, then disassemble the code object.""" 852 _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs) 853 854disco = disassemble # XXX For backwards compatibility 855 856 857# Rely on C `int` being 32 bits for oparg 858_INT_BITS = 32 859# Value for c int when it overflows 860_INT_OVERFLOW = 2 ** (_INT_BITS - 1) 861 862def _unpack_opargs(code): 863 extended_arg = 0 864 extended_args_offset = 0 # Number of EXTENDED_ARG instructions preceding the current instruction 865 caches = 0 866 for i in range(0, len(code), 2): 867 # Skip inline CACHE entries: 868 if caches: 869 caches -= 1 870 continue 871 op = code[i] 872 deop = _deoptop(op) 873 caches = _get_cache_size(_all_opname[deop]) 874 if deop in hasarg: 875 arg = code[i+1] | extended_arg 876 extended_arg = (arg << 8) if deop == EXTENDED_ARG else 0 877 # The oparg is stored as a signed integer 878 # If the value exceeds its upper limit, it will overflow and wrap 879 # to a negative integer 880 if extended_arg >= _INT_OVERFLOW: 881 extended_arg -= 2 * _INT_OVERFLOW 882 else: 883 arg = None 884 extended_arg = 0 885 if deop == EXTENDED_ARG: 886 extended_args_offset += 1 887 yield (i, i, op, arg) 888 else: 889 start_offset = i - extended_args_offset*2 890 yield (i, start_offset, op, arg) 891 extended_args_offset = 0 892 893def findlabels(code): 894 """Detect all offsets in a byte code which are jump targets. 895 896 Return the list of offsets. 897 898 """ 899 labels = [] 900 for offset, _, op, arg in _unpack_opargs(code): 901 if arg is not None: 902 label = _get_jump_target(op, arg, offset) 903 if label is None: 904 continue 905 if label not in labels: 906 labels.append(label) 907 return labels 908 909def findlinestarts(code): 910 """Find the offsets in a byte code which are start of lines in the source. 911 912 Generate pairs (offset, lineno) 913 lineno will be an integer or None the offset does not have a source line. 914 """ 915 916 lastline = False # None is a valid line number 917 for start, end, line in code.co_lines(): 918 if line is not lastline: 919 lastline = line 920 yield start, line 921 return 922 923def _find_imports(co): 924 """Find import statements in the code 925 926 Generate triplets (name, level, fromlist) where 927 name is the imported module and level, fromlist are 928 the corresponding args to __import__. 929 """ 930 IMPORT_NAME = opmap['IMPORT_NAME'] 931 932 consts = co.co_consts 933 names = co.co_names 934 opargs = [(op, arg) for _, _, op, arg in _unpack_opargs(co.co_code) 935 if op != EXTENDED_ARG] 936 for i, (op, oparg) in enumerate(opargs): 937 if op == IMPORT_NAME and i >= 2: 938 from_op = opargs[i-1] 939 level_op = opargs[i-2] 940 if (from_op[0] in hasconst and level_op[0] in hasconst): 941 level = _get_const_value(level_op[0], level_op[1], consts) 942 fromlist = _get_const_value(from_op[0], from_op[1], consts) 943 yield (names[oparg], level, fromlist) 944 945def _find_store_names(co): 946 """Find names of variables which are written in the code 947 948 Generate sequence of strings 949 """ 950 STORE_OPS = { 951 opmap['STORE_NAME'], 952 opmap['STORE_GLOBAL'] 953 } 954 955 names = co.co_names 956 for _, _, op, arg in _unpack_opargs(co.co_code): 957 if op in STORE_OPS: 958 yield names[arg] 959 960 961class Bytecode: 962 """The bytecode operations of a piece of code 963 964 Instantiate this with a function, method, other compiled object, string of 965 code, or a code object (as returned by compile()). 966 967 Iterating over this yields the bytecode operations as Instruction instances. 968 """ 969 def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False, show_offsets=False): 970 self.codeobj = co = _get_code_object(x) 971 if first_line is None: 972 self.first_line = co.co_firstlineno 973 self._line_offset = 0 974 else: 975 self.first_line = first_line 976 self._line_offset = first_line - co.co_firstlineno 977 self._linestarts = dict(findlinestarts(co)) 978 self._original_object = x 979 self.current_offset = current_offset 980 self.exception_entries = _parse_exception_table(co) 981 self.show_caches = show_caches 982 self.adaptive = adaptive 983 self.show_offsets = show_offsets 984 985 def __iter__(self): 986 co = self.codeobj 987 original_code = co.co_code 988 labels_map = _make_labels_map(original_code, self.exception_entries) 989 arg_resolver = ArgResolver(co_consts=co.co_consts, 990 names=co.co_names, 991 varname_from_oparg=co._varname_from_oparg, 992 labels_map=labels_map) 993 return _get_instructions_bytes(_get_code_array(co, self.adaptive), 994 linestarts=self._linestarts, 995 line_offset=self._line_offset, 996 co_positions=co.co_positions(), 997 original_code=original_code, 998 arg_resolver=arg_resolver) 999 1000 def __repr__(self): 1001 return "{}({!r})".format(self.__class__.__name__, 1002 self._original_object) 1003 1004 @classmethod 1005 def from_traceback(cls, tb, *, show_caches=False, adaptive=False): 1006 """ Construct a Bytecode from the given traceback """ 1007 while tb.tb_next: 1008 tb = tb.tb_next 1009 return cls( 1010 tb.tb_frame.f_code, current_offset=tb.tb_lasti, show_caches=show_caches, adaptive=adaptive 1011 ) 1012 1013 def info(self): 1014 """Return formatted information about the code object.""" 1015 return _format_code_info(self.codeobj) 1016 1017 def dis(self): 1018 """Return a formatted view of the bytecode operations.""" 1019 co = self.codeobj 1020 if self.current_offset is not None: 1021 offset = self.current_offset 1022 else: 1023 offset = -1 1024 with io.StringIO() as output: 1025 code = _get_code_array(co, self.adaptive) 1026 offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0 1027 1028 1029 labels_map = _make_labels_map(co.co_code, self.exception_entries) 1030 label_width = 4 + len(str(len(labels_map))) 1031 formatter = Formatter(file=output, 1032 lineno_width=_get_lineno_width(self._linestarts), 1033 offset_width=offset_width, 1034 label_width=label_width, 1035 line_offset=self._line_offset, 1036 show_caches=self.show_caches) 1037 1038 arg_resolver = ArgResolver(co_consts=co.co_consts, 1039 names=co.co_names, 1040 varname_from_oparg=co._varname_from_oparg, 1041 labels_map=labels_map) 1042 _disassemble_bytes(code, 1043 linestarts=self._linestarts, 1044 line_offset=self._line_offset, 1045 lasti=offset, 1046 exception_entries=self.exception_entries, 1047 co_positions=co.co_positions(), 1048 original_code=co.co_code, 1049 arg_resolver=arg_resolver, 1050 formatter=formatter) 1051 return output.getvalue() 1052 1053 1054def main(): 1055 import argparse 1056 1057 parser = argparse.ArgumentParser() 1058 parser.add_argument('-C', '--show-caches', action='store_true', 1059 help='show inline caches') 1060 parser.add_argument('-O', '--show-offsets', action='store_true', 1061 help='show instruction offsets') 1062 parser.add_argument('infile', nargs='?', default='-') 1063 args = parser.parse_args() 1064 if args.infile == '-': 1065 name = '<stdin>' 1066 source = sys.stdin.buffer.read() 1067 else: 1068 name = args.infile 1069 with open(args.infile, 'rb') as infile: 1070 source = infile.read() 1071 code = compile(source, name, "exec") 1072 dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets) 1073 1074if __name__ == "__main__": 1075 main() 1076