1"""Disassembler of Python byte code into mnemonics.""" 2 3import sys 4import types 5import collections 6import io 7 8from opcode import * 9from opcode import __all__ as _opcodes_all 10 11__all__ = ["code_info", "dis", "disassemble", "distb", "disco", 12 "findlinestarts", "findlabels", "show_code", 13 "get_instructions", "Instruction", "Bytecode"] + _opcodes_all 14del _opcodes_all 15 16_have_code = (types.MethodType, types.FunctionType, types.CodeType, 17 classmethod, staticmethod, type) 18 19FORMAT_VALUE = opmap['FORMAT_VALUE'] 20 21def _try_compile(source, name): 22 """Attempts to compile the given source, first as an expression and 23 then as a statement if the first approach fails. 24 25 Utility function to accept strings in functions that otherwise 26 expect code objects 27 """ 28 try: 29 c = compile(source, name, 'eval') 30 except SyntaxError: 31 c = compile(source, name, 'exec') 32 return c 33 34def dis(x=None, *, file=None, depth=None): 35 """Disassemble classes, methods, functions, and other compiled objects. 36 37 With no argument, disassemble the last traceback. 38 39 Compiled objects currently include generator objects, async generator 40 objects, and coroutine objects, all of which store their code object 41 in a special attribute. 42 """ 43 if x is None: 44 distb(file=file) 45 return 46 # Extract functions from methods. 47 if hasattr(x, '__func__'): 48 x = x.__func__ 49 # Extract compiled code objects from... 50 if hasattr(x, '__code__'): # ...a function, or 51 x = x.__code__ 52 elif hasattr(x, 'gi_code'): #...a generator object, or 53 x = x.gi_code 54 elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or 55 x = x.ag_code 56 elif hasattr(x, 'cr_code'): #...a coroutine. 57 x = x.cr_code 58 # Perform the disassembly. 59 if hasattr(x, '__dict__'): # Class or module 60 items = sorted(x.__dict__.items()) 61 for name, x1 in items: 62 if isinstance(x1, _have_code): 63 print("Disassembly of %s:" % name, file=file) 64 try: 65 dis(x1, file=file, depth=depth) 66 except TypeError as msg: 67 print("Sorry:", msg, file=file) 68 print(file=file) 69 elif hasattr(x, 'co_code'): # Code object 70 _disassemble_recursive(x, file=file, depth=depth) 71 elif isinstance(x, (bytes, bytearray)): # Raw bytecode 72 _disassemble_bytes(x, file=file) 73 elif isinstance(x, str): # Source code 74 _disassemble_str(x, file=file, depth=depth) 75 else: 76 raise TypeError("don't know how to disassemble %s objects" % 77 type(x).__name__) 78 79def distb(tb=None, *, file=None): 80 """Disassemble a traceback (default: last traceback).""" 81 if tb is None: 82 try: 83 tb = sys.last_traceback 84 except AttributeError: 85 raise RuntimeError("no last traceback to disassemble") from None 86 while tb.tb_next: tb = tb.tb_next 87 disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file) 88 89# The inspect module interrogates this dictionary to build its 90# list of CO_* constants. It is also used by pretty_flags to 91# turn the co_flags field into a human readable list. 92COMPILER_FLAG_NAMES = { 93 1: "OPTIMIZED", 94 2: "NEWLOCALS", 95 4: "VARARGS", 96 8: "VARKEYWORDS", 97 16: "NESTED", 98 32: "GENERATOR", 99 64: "NOFREE", 100 128: "COROUTINE", 101 256: "ITERABLE_COROUTINE", 102 512: "ASYNC_GENERATOR", 103} 104 105def pretty_flags(flags): 106 """Return pretty representation of code flags.""" 107 names = [] 108 for i in range(32): 109 flag = 1<<i 110 if flags & flag: 111 names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag))) 112 flags ^= flag 113 if not flags: 114 break 115 else: 116 names.append(hex(flags)) 117 return ", ".join(names) 118 119def _get_code_object(x): 120 """Helper to handle methods, compiled or raw code objects, and strings.""" 121 # Extract functions from methods. 122 if hasattr(x, '__func__'): 123 x = x.__func__ 124 # Extract compiled code objects from... 125 if hasattr(x, '__code__'): # ...a function, or 126 x = x.__code__ 127 elif hasattr(x, 'gi_code'): #...a generator object, or 128 x = x.gi_code 129 elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or 130 x = x.ag_code 131 elif hasattr(x, 'cr_code'): #...a coroutine. 132 x = x.cr_code 133 # Handle source code. 134 if isinstance(x, str): 135 x = _try_compile(x, "<disassembly>") 136 # By now, if we don't have a code object, we can't disassemble x. 137 if hasattr(x, 'co_code'): 138 return x 139 raise TypeError("don't know how to disassemble %s objects" % 140 type(x).__name__) 141 142def code_info(x): 143 """Formatted details of methods, functions, or code.""" 144 return _format_code_info(_get_code_object(x)) 145 146def _format_code_info(co): 147 lines = [] 148 lines.append("Name: %s" % co.co_name) 149 lines.append("Filename: %s" % co.co_filename) 150 lines.append("Argument count: %s" % co.co_argcount) 151 lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) 152 lines.append("Number of locals: %s" % co.co_nlocals) 153 lines.append("Stack size: %s" % co.co_stacksize) 154 lines.append("Flags: %s" % pretty_flags(co.co_flags)) 155 if co.co_consts: 156 lines.append("Constants:") 157 for i_c in enumerate(co.co_consts): 158 lines.append("%4d: %r" % i_c) 159 if co.co_names: 160 lines.append("Names:") 161 for i_n in enumerate(co.co_names): 162 lines.append("%4d: %s" % i_n) 163 if co.co_varnames: 164 lines.append("Variable names:") 165 for i_n in enumerate(co.co_varnames): 166 lines.append("%4d: %s" % i_n) 167 if co.co_freevars: 168 lines.append("Free variables:") 169 for i_n in enumerate(co.co_freevars): 170 lines.append("%4d: %s" % i_n) 171 if co.co_cellvars: 172 lines.append("Cell variables:") 173 for i_n in enumerate(co.co_cellvars): 174 lines.append("%4d: %s" % i_n) 175 return "\n".join(lines) 176 177def show_code(co, *, file=None): 178 """Print details of methods, functions, or code to *file*. 179 180 If *file* is not provided, the output is printed on stdout. 181 """ 182 print(code_info(co), file=file) 183 184_Instruction = collections.namedtuple("_Instruction", 185 "opname opcode arg argval argrepr offset starts_line is_jump_target") 186 187_Instruction.opname.__doc__ = "Human readable name for operation" 188_Instruction.opcode.__doc__ = "Numeric code for operation" 189_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None" 190_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg" 191_Instruction.argrepr.__doc__ = "Human readable description of operation argument" 192_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence" 193_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None" 194_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False" 195 196_OPNAME_WIDTH = 20 197_OPARG_WIDTH = 5 198 199class Instruction(_Instruction): 200 """Details for a bytecode operation 201 202 Defined fields: 203 opname - human readable name for operation 204 opcode - numeric code for operation 205 arg - numeric argument to operation (if any), otherwise None 206 argval - resolved arg value (if known), otherwise same as arg 207 argrepr - human readable description of operation argument 208 offset - start index of operation within bytecode sequence 209 starts_line - line started by this opcode (if any), otherwise None 210 is_jump_target - True if other code jumps to here, otherwise False 211 """ 212 213 def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4): 214 """Format instruction details for inclusion in disassembly output 215 216 *lineno_width* sets the width of the line number field (0 omits it) 217 *mark_as_current* inserts a '-->' marker arrow as part of the line 218 *offset_width* sets the width of the instruction offset field 219 """ 220 fields = [] 221 # Column: Source code line number 222 if lineno_width: 223 if self.starts_line is not None: 224 lineno_fmt = "%%%dd" % lineno_width 225 fields.append(lineno_fmt % self.starts_line) 226 else: 227 fields.append(' ' * lineno_width) 228 # Column: Current instruction indicator 229 if mark_as_current: 230 fields.append('-->') 231 else: 232 fields.append(' ') 233 # Column: Jump target marker 234 if self.is_jump_target: 235 fields.append('>>') 236 else: 237 fields.append(' ') 238 # Column: Instruction offset from start of code sequence 239 fields.append(repr(self.offset).rjust(offset_width)) 240 # Column: Opcode name 241 fields.append(self.opname.ljust(_OPNAME_WIDTH)) 242 # Column: Opcode argument 243 if self.arg is not None: 244 fields.append(repr(self.arg).rjust(_OPARG_WIDTH)) 245 # Column: Opcode argument details 246 if self.argrepr: 247 fields.append('(' + self.argrepr + ')') 248 return ' '.join(fields).rstrip() 249 250 251def get_instructions(x, *, first_line=None): 252 """Iterator for the opcodes in methods, functions or code 253 254 Generates a series of Instruction named tuples giving the details of 255 each operations in the supplied code. 256 257 If *first_line* is not None, it indicates the line number that should 258 be reported for the first source line in the disassembled code. 259 Otherwise, the source line information (if any) is taken directly from 260 the disassembled code object. 261 """ 262 co = _get_code_object(x) 263 cell_names = co.co_cellvars + co.co_freevars 264 linestarts = dict(findlinestarts(co)) 265 if first_line is not None: 266 line_offset = first_line - co.co_firstlineno 267 else: 268 line_offset = 0 269 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, 270 co.co_consts, cell_names, linestarts, 271 line_offset) 272 273def _get_const_info(const_index, const_list): 274 """Helper to get optional details about const references 275 276 Returns the dereferenced constant and its repr if the constant 277 list is defined. 278 Otherwise returns the constant index and its repr(). 279 """ 280 argval = const_index 281 if const_list is not None: 282 argval = const_list[const_index] 283 return argval, repr(argval) 284 285def _get_name_info(name_index, name_list): 286 """Helper to get optional details about named references 287 288 Returns the dereferenced name as both value and repr if the name 289 list is defined. 290 Otherwise returns the name index and its repr(). 291 """ 292 argval = name_index 293 if name_list is not None: 294 argval = name_list[name_index] 295 argrepr = argval 296 else: 297 argrepr = repr(argval) 298 return argval, argrepr 299 300 301def _get_instructions_bytes(code, varnames=None, names=None, constants=None, 302 cells=None, linestarts=None, line_offset=0): 303 """Iterate over the instructions in a bytecode string. 304 305 Generates a sequence of Instruction namedtuples giving the details of each 306 opcode. Additional information about the code's runtime environment 307 (e.g. variable names, constants) can be specified using optional 308 arguments. 309 310 """ 311 labels = findlabels(code) 312 starts_line = None 313 for offset, op, arg in _unpack_opargs(code): 314 if linestarts is not None: 315 starts_line = linestarts.get(offset, None) 316 if starts_line is not None: 317 starts_line += line_offset 318 is_jump_target = offset in labels 319 argval = None 320 argrepr = '' 321 if arg is not None: 322 # Set argval to the dereferenced value of the argument when 323 # available, and argrepr to the string representation of argval. 324 # _disassemble_bytes needs the string repr of the 325 # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. 326 argval = arg 327 if op in hasconst: 328 argval, argrepr = _get_const_info(arg, constants) 329 elif op in hasname: 330 argval, argrepr = _get_name_info(arg, names) 331 elif op in hasjrel: 332 argval = offset + 2 + arg 333 argrepr = "to " + repr(argval) 334 elif op in haslocal: 335 argval, argrepr = _get_name_info(arg, varnames) 336 elif op in hascompare: 337 argval = cmp_op[arg] 338 argrepr = argval 339 elif op in hasfree: 340 argval, argrepr = _get_name_info(arg, cells) 341 elif op == FORMAT_VALUE: 342 argval = ((None, str, repr, ascii)[arg & 0x3], bool(arg & 0x4)) 343 argrepr = ('', 'str', 'repr', 'ascii')[arg & 0x3] 344 if argval[1]: 345 if argrepr: 346 argrepr += ', ' 347 argrepr += 'with format' 348 yield Instruction(opname[op], op, 349 arg, argval, argrepr, 350 offset, starts_line, is_jump_target) 351 352def disassemble(co, lasti=-1, *, file=None): 353 """Disassemble a code object.""" 354 cell_names = co.co_cellvars + co.co_freevars 355 linestarts = dict(findlinestarts(co)) 356 _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names, 357 co.co_consts, cell_names, linestarts, file=file) 358 359def _disassemble_recursive(co, *, file=None, depth=None): 360 disassemble(co, file=file) 361 if depth is None or depth > 0: 362 if depth is not None: 363 depth = depth - 1 364 for x in co.co_consts: 365 if hasattr(x, 'co_code'): 366 print(file=file) 367 print("Disassembly of %r:" % (x,), file=file) 368 _disassemble_recursive(x, file=file, depth=depth) 369 370def _disassemble_bytes(code, lasti=-1, varnames=None, names=None, 371 constants=None, cells=None, linestarts=None, 372 *, file=None, line_offset=0): 373 # Omit the line number column entirely if we have no line number info 374 show_lineno = linestarts is not None 375 if show_lineno: 376 maxlineno = max(linestarts.values()) + line_offset 377 if maxlineno >= 1000: 378 lineno_width = len(str(maxlineno)) 379 else: 380 lineno_width = 3 381 else: 382 lineno_width = 0 383 maxoffset = len(code) - 2 384 if maxoffset >= 10000: 385 offset_width = len(str(maxoffset)) 386 else: 387 offset_width = 4 388 for instr in _get_instructions_bytes(code, varnames, names, 389 constants, cells, linestarts, 390 line_offset=line_offset): 391 new_source_line = (show_lineno and 392 instr.starts_line is not None and 393 instr.offset > 0) 394 if new_source_line: 395 print(file=file) 396 is_current_instr = instr.offset == lasti 397 print(instr._disassemble(lineno_width, is_current_instr, offset_width), 398 file=file) 399 400def _disassemble_str(source, **kwargs): 401 """Compile the source string, then disassemble the code object.""" 402 _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs) 403 404disco = disassemble # XXX For backwards compatibility 405 406def _unpack_opargs(code): 407 extended_arg = 0 408 for i in range(0, len(code), 2): 409 op = code[i] 410 if op >= HAVE_ARGUMENT: 411 arg = code[i+1] | extended_arg 412 extended_arg = (arg << 8) if op == EXTENDED_ARG else 0 413 else: 414 arg = None 415 yield (i, op, arg) 416 417def findlabels(code): 418 """Detect all offsets in a byte code which are jump targets. 419 420 Return the list of offsets. 421 422 """ 423 labels = [] 424 for offset, op, arg in _unpack_opargs(code): 425 if arg is not None: 426 if op in hasjrel: 427 label = offset + 2 + arg 428 elif op in hasjabs: 429 label = arg 430 else: 431 continue 432 if label not in labels: 433 labels.append(label) 434 return labels 435 436def findlinestarts(code): 437 """Find the offsets in a byte code which are start of lines in the source. 438 439 Generate pairs (offset, lineno) as described in Python/compile.c. 440 441 """ 442 byte_increments = code.co_lnotab[0::2] 443 line_increments = code.co_lnotab[1::2] 444 445 lastlineno = None 446 lineno = code.co_firstlineno 447 addr = 0 448 for byte_incr, line_incr in zip(byte_increments, line_increments): 449 if byte_incr: 450 if lineno != lastlineno: 451 yield (addr, lineno) 452 lastlineno = lineno 453 addr += byte_incr 454 if line_incr >= 0x80: 455 # line_increments is an array of 8-bit signed integers 456 line_incr -= 0x100 457 lineno += line_incr 458 if lineno != lastlineno: 459 yield (addr, lineno) 460 461class Bytecode: 462 """The bytecode operations of a piece of code 463 464 Instantiate this with a function, method, other compiled object, string of 465 code, or a code object (as returned by compile()). 466 467 Iterating over this yields the bytecode operations as Instruction instances. 468 """ 469 def __init__(self, x, *, first_line=None, current_offset=None): 470 self.codeobj = co = _get_code_object(x) 471 if first_line is None: 472 self.first_line = co.co_firstlineno 473 self._line_offset = 0 474 else: 475 self.first_line = first_line 476 self._line_offset = first_line - co.co_firstlineno 477 self._cell_names = co.co_cellvars + co.co_freevars 478 self._linestarts = dict(findlinestarts(co)) 479 self._original_object = x 480 self.current_offset = current_offset 481 482 def __iter__(self): 483 co = self.codeobj 484 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, 485 co.co_consts, self._cell_names, 486 self._linestarts, 487 line_offset=self._line_offset) 488 489 def __repr__(self): 490 return "{}({!r})".format(self.__class__.__name__, 491 self._original_object) 492 493 @classmethod 494 def from_traceback(cls, tb): 495 """ Construct a Bytecode from the given traceback """ 496 while tb.tb_next: 497 tb = tb.tb_next 498 return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti) 499 500 def info(self): 501 """Return formatted information about the code object.""" 502 return _format_code_info(self.codeobj) 503 504 def dis(self): 505 """Return a formatted view of the bytecode operations.""" 506 co = self.codeobj 507 if self.current_offset is not None: 508 offset = self.current_offset 509 else: 510 offset = -1 511 with io.StringIO() as output: 512 _disassemble_bytes(co.co_code, varnames=co.co_varnames, 513 names=co.co_names, constants=co.co_consts, 514 cells=self._cell_names, 515 linestarts=self._linestarts, 516 line_offset=self._line_offset, 517 file=output, 518 lasti=offset) 519 return output.getvalue() 520 521 522def _test(): 523 """Simple test program to disassemble a file.""" 524 import argparse 525 526 parser = argparse.ArgumentParser() 527 parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-') 528 args = parser.parse_args() 529 with args.infile as infile: 530 source = infile.read() 531 code = compile(source, args.infile.name, "exec") 532 dis(code) 533 534if __name__ == "__main__": 535 _test() 536