1"""Disassembler of Python byte code into mnemonics.""" 2 3import sys 4import types 5import collections 6import io 7 8from opcode import * 9from opcode import __all__ as _opcodes_all 10 11__all__ = ["code_info", "dis", "disassemble", "distb", "disco", 12 "findlinestarts", "findlabels", "show_code", 13 "get_instructions", "Instruction", "Bytecode"] + _opcodes_all 14del _opcodes_all 15 16_have_code = (types.MethodType, types.FunctionType, types.CodeType, 17 classmethod, staticmethod, type) 18 19FORMAT_VALUE = opmap['FORMAT_VALUE'] 20FORMAT_VALUE_CONVERTERS = ( 21 (None, ''), 22 (str, 'str'), 23 (repr, 'repr'), 24 (ascii, 'ascii'), 25) 26MAKE_FUNCTION = opmap['MAKE_FUNCTION'] 27MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure') 28 29 30def _try_compile(source, name): 31 """Attempts to compile the given source, first as an expression and 32 then as a statement if the first approach fails. 33 34 Utility function to accept strings in functions that otherwise 35 expect code objects 36 """ 37 try: 38 c = compile(source, name, 'eval') 39 except SyntaxError: 40 c = compile(source, name, 'exec') 41 return c 42 43def dis(x=None, *, file=None, depth=None): 44 """Disassemble classes, methods, functions, and other compiled objects. 45 46 With no argument, disassemble the last traceback. 47 48 Compiled objects currently include generator objects, async generator 49 objects, and coroutine objects, all of which store their code object 50 in a special attribute. 51 """ 52 if x is None: 53 distb(file=file) 54 return 55 # Extract functions from methods. 56 if hasattr(x, '__func__'): 57 x = x.__func__ 58 # Extract compiled code objects from... 59 if hasattr(x, '__code__'): # ...a function, or 60 x = x.__code__ 61 elif hasattr(x, 'gi_code'): #...a generator object, or 62 x = x.gi_code 63 elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or 64 x = x.ag_code 65 elif hasattr(x, 'cr_code'): #...a coroutine. 66 x = x.cr_code 67 # Perform the disassembly. 68 if hasattr(x, '__dict__'): # Class or module 69 items = sorted(x.__dict__.items()) 70 for name, x1 in items: 71 if isinstance(x1, _have_code): 72 print("Disassembly of %s:" % name, file=file) 73 try: 74 dis(x1, file=file, depth=depth) 75 except TypeError as msg: 76 print("Sorry:", msg, file=file) 77 print(file=file) 78 elif hasattr(x, 'co_code'): # Code object 79 _disassemble_recursive(x, file=file, depth=depth) 80 elif isinstance(x, (bytes, bytearray)): # Raw bytecode 81 _disassemble_bytes(x, file=file) 82 elif isinstance(x, str): # Source code 83 _disassemble_str(x, file=file, depth=depth) 84 else: 85 raise TypeError("don't know how to disassemble %s objects" % 86 type(x).__name__) 87 88def distb(tb=None, *, file=None): 89 """Disassemble a traceback (default: last traceback).""" 90 if tb is None: 91 try: 92 tb = sys.last_traceback 93 except AttributeError: 94 raise RuntimeError("no last traceback to disassemble") from None 95 while tb.tb_next: tb = tb.tb_next 96 disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file) 97 98# The inspect module interrogates this dictionary to build its 99# list of CO_* constants. It is also used by pretty_flags to 100# turn the co_flags field into a human readable list. 101COMPILER_FLAG_NAMES = { 102 1: "OPTIMIZED", 103 2: "NEWLOCALS", 104 4: "VARARGS", 105 8: "VARKEYWORDS", 106 16: "NESTED", 107 32: "GENERATOR", 108 64: "NOFREE", 109 128: "COROUTINE", 110 256: "ITERABLE_COROUTINE", 111 512: "ASYNC_GENERATOR", 112} 113 114def pretty_flags(flags): 115 """Return pretty representation of code flags.""" 116 names = [] 117 for i in range(32): 118 flag = 1<<i 119 if flags & flag: 120 names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag))) 121 flags ^= flag 122 if not flags: 123 break 124 else: 125 names.append(hex(flags)) 126 return ", ".join(names) 127 128def _get_code_object(x): 129 """Helper to handle methods, compiled or raw code objects, and strings.""" 130 # Extract functions from methods. 131 if hasattr(x, '__func__'): 132 x = x.__func__ 133 # Extract compiled code objects from... 134 if hasattr(x, '__code__'): # ...a function, or 135 x = x.__code__ 136 elif hasattr(x, 'gi_code'): #...a generator object, or 137 x = x.gi_code 138 elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or 139 x = x.ag_code 140 elif hasattr(x, 'cr_code'): #...a coroutine. 141 x = x.cr_code 142 # Handle source code. 143 if isinstance(x, str): 144 x = _try_compile(x, "<disassembly>") 145 # By now, if we don't have a code object, we can't disassemble x. 146 if hasattr(x, 'co_code'): 147 return x 148 raise TypeError("don't know how to disassemble %s objects" % 149 type(x).__name__) 150 151def code_info(x): 152 """Formatted details of methods, functions, or code.""" 153 return _format_code_info(_get_code_object(x)) 154 155def _format_code_info(co): 156 lines = [] 157 lines.append("Name: %s" % co.co_name) 158 lines.append("Filename: %s" % co.co_filename) 159 lines.append("Argument count: %s" % co.co_argcount) 160 lines.append("Positional-only arguments: %s" % co.co_posonlyargcount) 161 lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) 162 lines.append("Number of locals: %s" % co.co_nlocals) 163 lines.append("Stack size: %s" % co.co_stacksize) 164 lines.append("Flags: %s" % pretty_flags(co.co_flags)) 165 if co.co_consts: 166 lines.append("Constants:") 167 for i_c in enumerate(co.co_consts): 168 lines.append("%4d: %r" % i_c) 169 if co.co_names: 170 lines.append("Names:") 171 for i_n in enumerate(co.co_names): 172 lines.append("%4d: %s" % i_n) 173 if co.co_varnames: 174 lines.append("Variable names:") 175 for i_n in enumerate(co.co_varnames): 176 lines.append("%4d: %s" % i_n) 177 if co.co_freevars: 178 lines.append("Free variables:") 179 for i_n in enumerate(co.co_freevars): 180 lines.append("%4d: %s" % i_n) 181 if co.co_cellvars: 182 lines.append("Cell variables:") 183 for i_n in enumerate(co.co_cellvars): 184 lines.append("%4d: %s" % i_n) 185 return "\n".join(lines) 186 187def show_code(co, *, file=None): 188 """Print details of methods, functions, or code to *file*. 189 190 If *file* is not provided, the output is printed on stdout. 191 """ 192 print(code_info(co), file=file) 193 194_Instruction = collections.namedtuple("_Instruction", 195 "opname opcode arg argval argrepr offset starts_line is_jump_target") 196 197_Instruction.opname.__doc__ = "Human readable name for operation" 198_Instruction.opcode.__doc__ = "Numeric code for operation" 199_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None" 200_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg" 201_Instruction.argrepr.__doc__ = "Human readable description of operation argument" 202_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence" 203_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None" 204_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False" 205 206_OPNAME_WIDTH = 20 207_OPARG_WIDTH = 5 208 209class Instruction(_Instruction): 210 """Details for a bytecode operation 211 212 Defined fields: 213 opname - human readable name for operation 214 opcode - numeric code for operation 215 arg - numeric argument to operation (if any), otherwise None 216 argval - resolved arg value (if known), otherwise same as arg 217 argrepr - human readable description of operation argument 218 offset - start index of operation within bytecode sequence 219 starts_line - line started by this opcode (if any), otherwise None 220 is_jump_target - True if other code jumps to here, otherwise False 221 """ 222 223 def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4): 224 """Format instruction details for inclusion in disassembly output 225 226 *lineno_width* sets the width of the line number field (0 omits it) 227 *mark_as_current* inserts a '-->' marker arrow as part of the line 228 *offset_width* sets the width of the instruction offset field 229 """ 230 fields = [] 231 # Column: Source code line number 232 if lineno_width: 233 if self.starts_line is not None: 234 lineno_fmt = "%%%dd" % lineno_width 235 fields.append(lineno_fmt % self.starts_line) 236 else: 237 fields.append(' ' * lineno_width) 238 # Column: Current instruction indicator 239 if mark_as_current: 240 fields.append('-->') 241 else: 242 fields.append(' ') 243 # Column: Jump target marker 244 if self.is_jump_target: 245 fields.append('>>') 246 else: 247 fields.append(' ') 248 # Column: Instruction offset from start of code sequence 249 fields.append(repr(self.offset).rjust(offset_width)) 250 # Column: Opcode name 251 fields.append(self.opname.ljust(_OPNAME_WIDTH)) 252 # Column: Opcode argument 253 if self.arg is not None: 254 fields.append(repr(self.arg).rjust(_OPARG_WIDTH)) 255 # Column: Opcode argument details 256 if self.argrepr: 257 fields.append('(' + self.argrepr + ')') 258 return ' '.join(fields).rstrip() 259 260 261def get_instructions(x, *, first_line=None): 262 """Iterator for the opcodes in methods, functions or code 263 264 Generates a series of Instruction named tuples giving the details of 265 each operations in the supplied code. 266 267 If *first_line* is not None, it indicates the line number that should 268 be reported for the first source line in the disassembled code. 269 Otherwise, the source line information (if any) is taken directly from 270 the disassembled code object. 271 """ 272 co = _get_code_object(x) 273 cell_names = co.co_cellvars + co.co_freevars 274 linestarts = dict(findlinestarts(co)) 275 if first_line is not None: 276 line_offset = first_line - co.co_firstlineno 277 else: 278 line_offset = 0 279 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, 280 co.co_consts, cell_names, linestarts, 281 line_offset) 282 283def _get_const_info(const_index, const_list): 284 """Helper to get optional details about const references 285 286 Returns the dereferenced constant and its repr if the constant 287 list is defined. 288 Otherwise returns the constant index and its repr(). 289 """ 290 argval = const_index 291 if const_list is not None: 292 argval = const_list[const_index] 293 return argval, repr(argval) 294 295def _get_name_info(name_index, name_list): 296 """Helper to get optional details about named references 297 298 Returns the dereferenced name as both value and repr if the name 299 list is defined. 300 Otherwise returns the name index and its repr(). 301 """ 302 argval = name_index 303 if name_list is not None: 304 argval = name_list[name_index] 305 argrepr = argval 306 else: 307 argrepr = repr(argval) 308 return argval, argrepr 309 310 311def _get_instructions_bytes(code, varnames=None, names=None, constants=None, 312 cells=None, linestarts=None, line_offset=0): 313 """Iterate over the instructions in a bytecode string. 314 315 Generates a sequence of Instruction namedtuples giving the details of each 316 opcode. Additional information about the code's runtime environment 317 (e.g. variable names, constants) can be specified using optional 318 arguments. 319 320 """ 321 labels = findlabels(code) 322 starts_line = None 323 for offset, op, arg in _unpack_opargs(code): 324 if linestarts is not None: 325 starts_line = linestarts.get(offset, None) 326 if starts_line is not None: 327 starts_line += line_offset 328 is_jump_target = offset in labels 329 argval = None 330 argrepr = '' 331 if arg is not None: 332 # Set argval to the dereferenced value of the argument when 333 # available, and argrepr to the string representation of argval. 334 # _disassemble_bytes needs the string repr of the 335 # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. 336 argval = arg 337 if op in hasconst: 338 argval, argrepr = _get_const_info(arg, constants) 339 elif op in hasname: 340 argval, argrepr = _get_name_info(arg, names) 341 elif op in hasjrel: 342 argval = offset + 2 + arg 343 argrepr = "to " + repr(argval) 344 elif op in haslocal: 345 argval, argrepr = _get_name_info(arg, varnames) 346 elif op in hascompare: 347 argval = cmp_op[arg] 348 argrepr = argval 349 elif op in hasfree: 350 argval, argrepr = _get_name_info(arg, cells) 351 elif op == FORMAT_VALUE: 352 argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3] 353 argval = (argval, bool(arg & 0x4)) 354 if argval[1]: 355 if argrepr: 356 argrepr += ', ' 357 argrepr += 'with format' 358 elif op == MAKE_FUNCTION: 359 argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS) 360 if arg & (1<<i)) 361 yield Instruction(opname[op], op, 362 arg, argval, argrepr, 363 offset, starts_line, is_jump_target) 364 365def disassemble(co, lasti=-1, *, file=None): 366 """Disassemble a code object.""" 367 cell_names = co.co_cellvars + co.co_freevars 368 linestarts = dict(findlinestarts(co)) 369 _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names, 370 co.co_consts, cell_names, linestarts, file=file) 371 372def _disassemble_recursive(co, *, file=None, depth=None): 373 disassemble(co, file=file) 374 if depth is None or depth > 0: 375 if depth is not None: 376 depth = depth - 1 377 for x in co.co_consts: 378 if hasattr(x, 'co_code'): 379 print(file=file) 380 print("Disassembly of %r:" % (x,), file=file) 381 _disassemble_recursive(x, file=file, depth=depth) 382 383def _disassemble_bytes(code, lasti=-1, varnames=None, names=None, 384 constants=None, cells=None, linestarts=None, 385 *, file=None, line_offset=0): 386 # Omit the line number column entirely if we have no line number info 387 show_lineno = linestarts is not None 388 if show_lineno: 389 maxlineno = max(linestarts.values()) + line_offset 390 if maxlineno >= 1000: 391 lineno_width = len(str(maxlineno)) 392 else: 393 lineno_width = 3 394 else: 395 lineno_width = 0 396 maxoffset = len(code) - 2 397 if maxoffset >= 10000: 398 offset_width = len(str(maxoffset)) 399 else: 400 offset_width = 4 401 for instr in _get_instructions_bytes(code, varnames, names, 402 constants, cells, linestarts, 403 line_offset=line_offset): 404 new_source_line = (show_lineno and 405 instr.starts_line is not None and 406 instr.offset > 0) 407 if new_source_line: 408 print(file=file) 409 is_current_instr = instr.offset == lasti 410 print(instr._disassemble(lineno_width, is_current_instr, offset_width), 411 file=file) 412 413def _disassemble_str(source, **kwargs): 414 """Compile the source string, then disassemble the code object.""" 415 _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs) 416 417disco = disassemble # XXX For backwards compatibility 418 419def _unpack_opargs(code): 420 extended_arg = 0 421 for i in range(0, len(code), 2): 422 op = code[i] 423 if op >= HAVE_ARGUMENT: 424 arg = code[i+1] | extended_arg 425 extended_arg = (arg << 8) if op == EXTENDED_ARG else 0 426 else: 427 arg = None 428 yield (i, op, arg) 429 430def findlabels(code): 431 """Detect all offsets in a byte code which are jump targets. 432 433 Return the list of offsets. 434 435 """ 436 labels = [] 437 for offset, op, arg in _unpack_opargs(code): 438 if arg is not None: 439 if op in hasjrel: 440 label = offset + 2 + arg 441 elif op in hasjabs: 442 label = arg 443 else: 444 continue 445 if label not in labels: 446 labels.append(label) 447 return labels 448 449def findlinestarts(code): 450 """Find the offsets in a byte code which are start of lines in the source. 451 452 Generate pairs (offset, lineno) as described in Python/compile.c. 453 454 """ 455 byte_increments = code.co_lnotab[0::2] 456 line_increments = code.co_lnotab[1::2] 457 bytecode_len = len(code.co_code) 458 459 lastlineno = None 460 lineno = code.co_firstlineno 461 addr = 0 462 for byte_incr, line_incr in zip(byte_increments, line_increments): 463 if byte_incr: 464 if lineno != lastlineno: 465 yield (addr, lineno) 466 lastlineno = lineno 467 addr += byte_incr 468 if addr >= bytecode_len: 469 # The rest of the lnotab byte offsets are past the end of 470 # the bytecode, so the lines were optimized away. 471 return 472 if line_incr >= 0x80: 473 # line_increments is an array of 8-bit signed integers 474 line_incr -= 0x100 475 lineno += line_incr 476 if lineno != lastlineno: 477 yield (addr, lineno) 478 479class Bytecode: 480 """The bytecode operations of a piece of code 481 482 Instantiate this with a function, method, other compiled object, string of 483 code, or a code object (as returned by compile()). 484 485 Iterating over this yields the bytecode operations as Instruction instances. 486 """ 487 def __init__(self, x, *, first_line=None, current_offset=None): 488 self.codeobj = co = _get_code_object(x) 489 if first_line is None: 490 self.first_line = co.co_firstlineno 491 self._line_offset = 0 492 else: 493 self.first_line = first_line 494 self._line_offset = first_line - co.co_firstlineno 495 self._cell_names = co.co_cellvars + co.co_freevars 496 self._linestarts = dict(findlinestarts(co)) 497 self._original_object = x 498 self.current_offset = current_offset 499 500 def __iter__(self): 501 co = self.codeobj 502 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, 503 co.co_consts, self._cell_names, 504 self._linestarts, 505 line_offset=self._line_offset) 506 507 def __repr__(self): 508 return "{}({!r})".format(self.__class__.__name__, 509 self._original_object) 510 511 @classmethod 512 def from_traceback(cls, tb): 513 """ Construct a Bytecode from the given traceback """ 514 while tb.tb_next: 515 tb = tb.tb_next 516 return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti) 517 518 def info(self): 519 """Return formatted information about the code object.""" 520 return _format_code_info(self.codeobj) 521 522 def dis(self): 523 """Return a formatted view of the bytecode operations.""" 524 co = self.codeobj 525 if self.current_offset is not None: 526 offset = self.current_offset 527 else: 528 offset = -1 529 with io.StringIO() as output: 530 _disassemble_bytes(co.co_code, varnames=co.co_varnames, 531 names=co.co_names, constants=co.co_consts, 532 cells=self._cell_names, 533 linestarts=self._linestarts, 534 line_offset=self._line_offset, 535 file=output, 536 lasti=offset) 537 return output.getvalue() 538 539 540def _test(): 541 """Simple test program to disassemble a file.""" 542 import argparse 543 544 parser = argparse.ArgumentParser() 545 parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-') 546 args = parser.parse_args() 547 with args.infile as infile: 548 source = infile.read() 549 code = compile(source, args.infile.name, "exec") 550 dis(code) 551 552if __name__ == "__main__": 553 _test() 554