1"""Disassembler of Python byte code into mnemonics.""" 2 3import sys 4import types 5import collections 6import io 7 8from opcode import * 9from opcode import __all__ as _opcodes_all 10 11__all__ = ["code_info", "dis", "disassemble", "distb", "disco", 12 "findlinestarts", "findlabels", "show_code", 13 "get_instructions", "Instruction", "Bytecode"] + _opcodes_all 14del _opcodes_all 15 16_have_code = (types.MethodType, types.FunctionType, types.CodeType, 17 classmethod, staticmethod, type) 18 19FORMAT_VALUE = opmap['FORMAT_VALUE'] 20FORMAT_VALUE_CONVERTERS = ( 21 (None, ''), 22 (str, 'str'), 23 (repr, 'repr'), 24 (ascii, 'ascii'), 25) 26MAKE_FUNCTION = opmap['MAKE_FUNCTION'] 27MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure') 28 29 30def _try_compile(source, name): 31 """Attempts to compile the given source, first as an expression and 32 then as a statement if the first approach fails. 33 34 Utility function to accept strings in functions that otherwise 35 expect code objects 36 """ 37 try: 38 c = compile(source, name, 'eval') 39 except SyntaxError: 40 c = compile(source, name, 'exec') 41 return c 42 43def dis(x=None, *, file=None, depth=None): 44 """Disassemble classes, methods, functions, and other compiled objects. 45 46 With no argument, disassemble the last traceback. 47 48 Compiled objects currently include generator objects, async generator 49 objects, and coroutine objects, all of which store their code object 50 in a special attribute. 51 """ 52 if x is None: 53 distb(file=file) 54 return 55 # Extract functions from methods. 56 if hasattr(x, '__func__'): 57 x = x.__func__ 58 # Extract compiled code objects from... 59 if hasattr(x, '__code__'): # ...a function, or 60 x = x.__code__ 61 elif hasattr(x, 'gi_code'): #...a generator object, or 62 x = x.gi_code 63 elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or 64 x = x.ag_code 65 elif hasattr(x, 'cr_code'): #...a coroutine. 66 x = x.cr_code 67 # Perform the disassembly. 68 if hasattr(x, '__dict__'): # Class or module 69 items = sorted(x.__dict__.items()) 70 for name, x1 in items: 71 if isinstance(x1, _have_code): 72 print("Disassembly of %s:" % name, file=file) 73 try: 74 dis(x1, file=file, depth=depth) 75 except TypeError as msg: 76 print("Sorry:", msg, file=file) 77 print(file=file) 78 elif hasattr(x, 'co_code'): # Code object 79 _disassemble_recursive(x, file=file, depth=depth) 80 elif isinstance(x, (bytes, bytearray)): # Raw bytecode 81 _disassemble_bytes(x, file=file) 82 elif isinstance(x, str): # Source code 83 _disassemble_str(x, file=file, depth=depth) 84 else: 85 raise TypeError("don't know how to disassemble %s objects" % 86 type(x).__name__) 87 88def distb(tb=None, *, file=None): 89 """Disassemble a traceback (default: last traceback).""" 90 if tb is None: 91 try: 92 tb = sys.last_traceback 93 except AttributeError: 94 raise RuntimeError("no last traceback to disassemble") from None 95 while tb.tb_next: tb = tb.tb_next 96 disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file) 97 98# The inspect module interrogates this dictionary to build its 99# list of CO_* constants. It is also used by pretty_flags to 100# turn the co_flags field into a human readable list. 101COMPILER_FLAG_NAMES = { 102 1: "OPTIMIZED", 103 2: "NEWLOCALS", 104 4: "VARARGS", 105 8: "VARKEYWORDS", 106 16: "NESTED", 107 32: "GENERATOR", 108 64: "NOFREE", 109 128: "COROUTINE", 110 256: "ITERABLE_COROUTINE", 111 512: "ASYNC_GENERATOR", 112} 113 114def pretty_flags(flags): 115 """Return pretty representation of code flags.""" 116 names = [] 117 for i in range(32): 118 flag = 1<<i 119 if flags & flag: 120 names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag))) 121 flags ^= flag 122 if not flags: 123 break 124 else: 125 names.append(hex(flags)) 126 return ", ".join(names) 127 128def _get_code_object(x): 129 """Helper to handle methods, compiled or raw code objects, and strings.""" 130 # Extract functions from methods. 131 if hasattr(x, '__func__'): 132 x = x.__func__ 133 # Extract compiled code objects from... 134 if hasattr(x, '__code__'): # ...a function, or 135 x = x.__code__ 136 elif hasattr(x, 'gi_code'): #...a generator object, or 137 x = x.gi_code 138 elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or 139 x = x.ag_code 140 elif hasattr(x, 'cr_code'): #...a coroutine. 141 x = x.cr_code 142 # Handle source code. 143 if isinstance(x, str): 144 x = _try_compile(x, "<disassembly>") 145 # By now, if we don't have a code object, we can't disassemble x. 146 if hasattr(x, 'co_code'): 147 return x 148 raise TypeError("don't know how to disassemble %s objects" % 149 type(x).__name__) 150 151def code_info(x): 152 """Formatted details of methods, functions, or code.""" 153 return _format_code_info(_get_code_object(x)) 154 155def _format_code_info(co): 156 lines = [] 157 lines.append("Name: %s" % co.co_name) 158 lines.append("Filename: %s" % co.co_filename) 159 lines.append("Argument count: %s" % co.co_argcount) 160 lines.append("Positional-only arguments: %s" % co.co_posonlyargcount) 161 lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) 162 lines.append("Number of locals: %s" % co.co_nlocals) 163 lines.append("Stack size: %s" % co.co_stacksize) 164 lines.append("Flags: %s" % pretty_flags(co.co_flags)) 165 if co.co_consts: 166 lines.append("Constants:") 167 for i_c in enumerate(co.co_consts): 168 lines.append("%4d: %r" % i_c) 169 if co.co_names: 170 lines.append("Names:") 171 for i_n in enumerate(co.co_names): 172 lines.append("%4d: %s" % i_n) 173 if co.co_varnames: 174 lines.append("Variable names:") 175 for i_n in enumerate(co.co_varnames): 176 lines.append("%4d: %s" % i_n) 177 if co.co_freevars: 178 lines.append("Free variables:") 179 for i_n in enumerate(co.co_freevars): 180 lines.append("%4d: %s" % i_n) 181 if co.co_cellvars: 182 lines.append("Cell variables:") 183 for i_n in enumerate(co.co_cellvars): 184 lines.append("%4d: %s" % i_n) 185 return "\n".join(lines) 186 187def show_code(co, *, file=None): 188 """Print details of methods, functions, or code to *file*. 189 190 If *file* is not provided, the output is printed on stdout. 191 """ 192 print(code_info(co), file=file) 193 194_Instruction = collections.namedtuple("_Instruction", 195 "opname opcode arg argval argrepr offset starts_line is_jump_target") 196 197_Instruction.opname.__doc__ = "Human readable name for operation" 198_Instruction.opcode.__doc__ = "Numeric code for operation" 199_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None" 200_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg" 201_Instruction.argrepr.__doc__ = "Human readable description of operation argument" 202_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence" 203_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None" 204_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False" 205 206_OPNAME_WIDTH = 20 207_OPARG_WIDTH = 5 208 209class Instruction(_Instruction): 210 """Details for a bytecode operation 211 212 Defined fields: 213 opname - human readable name for operation 214 opcode - numeric code for operation 215 arg - numeric argument to operation (if any), otherwise None 216 argval - resolved arg value (if known), otherwise same as arg 217 argrepr - human readable description of operation argument 218 offset - start index of operation within bytecode sequence 219 starts_line - line started by this opcode (if any), otherwise None 220 is_jump_target - True if other code jumps to here, otherwise False 221 """ 222 223 def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4): 224 """Format instruction details for inclusion in disassembly output 225 226 *lineno_width* sets the width of the line number field (0 omits it) 227 *mark_as_current* inserts a '-->' marker arrow as part of the line 228 *offset_width* sets the width of the instruction offset field 229 """ 230 fields = [] 231 # Column: Source code line number 232 if lineno_width: 233 if self.starts_line is not None: 234 lineno_fmt = "%%%dd" % lineno_width 235 fields.append(lineno_fmt % self.starts_line) 236 else: 237 fields.append(' ' * lineno_width) 238 # Column: Current instruction indicator 239 if mark_as_current: 240 fields.append('-->') 241 else: 242 fields.append(' ') 243 # Column: Jump target marker 244 if self.is_jump_target: 245 fields.append('>>') 246 else: 247 fields.append(' ') 248 # Column: Instruction offset from start of code sequence 249 fields.append(repr(self.offset).rjust(offset_width)) 250 # Column: Opcode name 251 fields.append(self.opname.ljust(_OPNAME_WIDTH)) 252 # Column: Opcode argument 253 if self.arg is not None: 254 fields.append(repr(self.arg).rjust(_OPARG_WIDTH)) 255 # Column: Opcode argument details 256 if self.argrepr: 257 fields.append('(' + self.argrepr + ')') 258 return ' '.join(fields).rstrip() 259 260 261def get_instructions(x, *, first_line=None): 262 """Iterator for the opcodes in methods, functions or code 263 264 Generates a series of Instruction named tuples giving the details of 265 each operations in the supplied code. 266 267 If *first_line* is not None, it indicates the line number that should 268 be reported for the first source line in the disassembled code. 269 Otherwise, the source line information (if any) is taken directly from 270 the disassembled code object. 271 """ 272 co = _get_code_object(x) 273 cell_names = co.co_cellvars + co.co_freevars 274 linestarts = dict(findlinestarts(co)) 275 if first_line is not None: 276 line_offset = first_line - co.co_firstlineno 277 else: 278 line_offset = 0 279 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, 280 co.co_consts, cell_names, linestarts, 281 line_offset) 282 283def _get_const_info(const_index, const_list): 284 """Helper to get optional details about const references 285 286 Returns the dereferenced constant and its repr if the constant 287 list is defined. 288 Otherwise returns the constant index and its repr(). 289 """ 290 argval = const_index 291 if const_list is not None: 292 argval = const_list[const_index] 293 return argval, repr(argval) 294 295def _get_name_info(name_index, name_list): 296 """Helper to get optional details about named references 297 298 Returns the dereferenced name as both value and repr if the name 299 list is defined. 300 Otherwise returns the name index and its repr(). 301 """ 302 argval = name_index 303 if name_list is not None: 304 argval = name_list[name_index] 305 argrepr = argval 306 else: 307 argrepr = repr(argval) 308 return argval, argrepr 309 310 311def _get_instructions_bytes(code, varnames=None, names=None, constants=None, 312 cells=None, linestarts=None, line_offset=0): 313 """Iterate over the instructions in a bytecode string. 314 315 Generates a sequence of Instruction namedtuples giving the details of each 316 opcode. Additional information about the code's runtime environment 317 (e.g. variable names, constants) can be specified using optional 318 arguments. 319 320 """ 321 labels = findlabels(code) 322 starts_line = None 323 for offset, op, arg in _unpack_opargs(code): 324 if linestarts is not None: 325 starts_line = linestarts.get(offset, None) 326 if starts_line is not None: 327 starts_line += line_offset 328 is_jump_target = offset in labels 329 argval = None 330 argrepr = '' 331 if arg is not None: 332 # Set argval to the dereferenced value of the argument when 333 # available, and argrepr to the string representation of argval. 334 # _disassemble_bytes needs the string repr of the 335 # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. 336 argval = arg 337 if op in hasconst: 338 argval, argrepr = _get_const_info(arg, constants) 339 elif op in hasname: 340 argval, argrepr = _get_name_info(arg, names) 341 elif op in hasjabs: 342 argval = arg*2 343 argrepr = "to " + repr(argval) 344 elif op in hasjrel: 345 argval = offset + 2 + arg*2 346 argrepr = "to " + repr(argval) 347 elif op in haslocal: 348 argval, argrepr = _get_name_info(arg, varnames) 349 elif op in hascompare: 350 argval = cmp_op[arg] 351 argrepr = argval 352 elif op in hasfree: 353 argval, argrepr = _get_name_info(arg, cells) 354 elif op == FORMAT_VALUE: 355 argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3] 356 argval = (argval, bool(arg & 0x4)) 357 if argval[1]: 358 if argrepr: 359 argrepr += ', ' 360 argrepr += 'with format' 361 elif op == MAKE_FUNCTION: 362 argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS) 363 if arg & (1<<i)) 364 yield Instruction(opname[op], op, 365 arg, argval, argrepr, 366 offset, starts_line, is_jump_target) 367 368def disassemble(co, lasti=-1, *, file=None): 369 """Disassemble a code object.""" 370 cell_names = co.co_cellvars + co.co_freevars 371 linestarts = dict(findlinestarts(co)) 372 _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names, 373 co.co_consts, cell_names, linestarts, file=file) 374 375def _disassemble_recursive(co, *, file=None, depth=None): 376 disassemble(co, file=file) 377 if depth is None or depth > 0: 378 if depth is not None: 379 depth = depth - 1 380 for x in co.co_consts: 381 if hasattr(x, 'co_code'): 382 print(file=file) 383 print("Disassembly of %r:" % (x,), file=file) 384 _disassemble_recursive(x, file=file, depth=depth) 385 386def _disassemble_bytes(code, lasti=-1, varnames=None, names=None, 387 constants=None, cells=None, linestarts=None, 388 *, file=None, line_offset=0): 389 # Omit the line number column entirely if we have no line number info 390 show_lineno = bool(linestarts) 391 if show_lineno: 392 maxlineno = max(linestarts.values()) + line_offset 393 if maxlineno >= 1000: 394 lineno_width = len(str(maxlineno)) 395 else: 396 lineno_width = 3 397 else: 398 lineno_width = 0 399 maxoffset = len(code) - 2 400 if maxoffset >= 10000: 401 offset_width = len(str(maxoffset)) 402 else: 403 offset_width = 4 404 for instr in _get_instructions_bytes(code, varnames, names, 405 constants, cells, linestarts, 406 line_offset=line_offset): 407 new_source_line = (show_lineno and 408 instr.starts_line is not None and 409 instr.offset > 0) 410 if new_source_line: 411 print(file=file) 412 is_current_instr = instr.offset == lasti 413 print(instr._disassemble(lineno_width, is_current_instr, offset_width), 414 file=file) 415 416def _disassemble_str(source, **kwargs): 417 """Compile the source string, then disassemble the code object.""" 418 _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs) 419 420disco = disassemble # XXX For backwards compatibility 421 422def _unpack_opargs(code): 423 extended_arg = 0 424 for i in range(0, len(code), 2): 425 op = code[i] 426 if op >= HAVE_ARGUMENT: 427 arg = code[i+1] | extended_arg 428 extended_arg = (arg << 8) if op == EXTENDED_ARG else 0 429 else: 430 arg = None 431 extended_arg = 0 432 yield (i, op, arg) 433 434def findlabels(code): 435 """Detect all offsets in a byte code which are jump targets. 436 437 Return the list of offsets. 438 439 """ 440 labels = [] 441 for offset, op, arg in _unpack_opargs(code): 442 if arg is not None: 443 if op in hasjrel: 444 label = offset + 2 + arg*2 445 elif op in hasjabs: 446 label = arg*2 447 else: 448 continue 449 if label not in labels: 450 labels.append(label) 451 return labels 452 453def findlinestarts(code): 454 """Find the offsets in a byte code which are start of lines in the source. 455 456 Generate pairs (offset, lineno) 457 """ 458 lastline = None 459 for start, end, line in code.co_lines(): 460 if line is not None and line != lastline: 461 lastline = line 462 yield start, line 463 return 464 465 466class Bytecode: 467 """The bytecode operations of a piece of code 468 469 Instantiate this with a function, method, other compiled object, string of 470 code, or a code object (as returned by compile()). 471 472 Iterating over this yields the bytecode operations as Instruction instances. 473 """ 474 def __init__(self, x, *, first_line=None, current_offset=None): 475 self.codeobj = co = _get_code_object(x) 476 if first_line is None: 477 self.first_line = co.co_firstlineno 478 self._line_offset = 0 479 else: 480 self.first_line = first_line 481 self._line_offset = first_line - co.co_firstlineno 482 self._cell_names = co.co_cellvars + co.co_freevars 483 self._linestarts = dict(findlinestarts(co)) 484 self._original_object = x 485 self.current_offset = current_offset 486 487 def __iter__(self): 488 co = self.codeobj 489 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, 490 co.co_consts, self._cell_names, 491 self._linestarts, 492 line_offset=self._line_offset) 493 494 def __repr__(self): 495 return "{}({!r})".format(self.__class__.__name__, 496 self._original_object) 497 498 @classmethod 499 def from_traceback(cls, tb): 500 """ Construct a Bytecode from the given traceback """ 501 while tb.tb_next: 502 tb = tb.tb_next 503 return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti) 504 505 def info(self): 506 """Return formatted information about the code object.""" 507 return _format_code_info(self.codeobj) 508 509 def dis(self): 510 """Return a formatted view of the bytecode operations.""" 511 co = self.codeobj 512 if self.current_offset is not None: 513 offset = self.current_offset 514 else: 515 offset = -1 516 with io.StringIO() as output: 517 _disassemble_bytes(co.co_code, varnames=co.co_varnames, 518 names=co.co_names, constants=co.co_consts, 519 cells=self._cell_names, 520 linestarts=self._linestarts, 521 line_offset=self._line_offset, 522 file=output, 523 lasti=offset) 524 return output.getvalue() 525 526 527def _test(): 528 """Simple test program to disassemble a file.""" 529 import argparse 530 531 parser = argparse.ArgumentParser() 532 parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-') 533 args = parser.parse_args() 534 with args.infile as infile: 535 source = infile.read() 536 code = compile(source, args.infile.name, "exec") 537 dis(code) 538 539if __name__ == "__main__": 540 _test() 541