1#!/usr/bin/python 2''' 3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb 4to be extended with Python code e.g. for library-specific data visualizations, 5such as for the C++ STL types. Documentation on this API can be seen at: 6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html 7 8 9This python module deals with the case when the process being debugged (the 10"inferior process" in gdb parlance) is itself python, or more specifically, 11linked against libpython. In this situation, almost every item of data is a 12(PyObject*), and having the debugger merely print their addresses is not very 13enlightening. 14 15This module embeds knowledge about the implementation details of libpython so 16that we can emit useful visualizations e.g. a string, a list, a dict, a frame 17giving file/line information and the state of local variables 18 19In particular, given a gdb.Value corresponding to a PyObject* in the inferior 20process, we can generate a "proxy value" within the gdb process. For example, 21given a PyObject* in the inferior process that is in fact a PyListObject* 22holding three PyObject* that turn out to be PyBytesObject* instances, we can 23generate a proxy value within the gdb process that is a list of bytes 24instances: 25 [b"foo", b"bar", b"baz"] 26 27Doing so can be expensive for complicated graphs of objects, and could take 28some time, so we also have a "write_repr" method that writes a representation 29of the data to a file-like object. This allows us to stop the traversal by 30having the file-like object raise an exception if it gets too much data. 31 32With both "proxyval" and "write_repr" we keep track of the set of all addresses 33visited so far in the traversal, to avoid infinite recursion due to cycles in 34the graph of object references. 35 36We try to defer gdb.lookup_type() invocations for python types until as late as 37possible: for a dynamically linked python binary, when the process starts in 38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of 39the type names are known to the debugger 40 41The module also extends gdb with some python-specific commands. 42''' 43 44# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax 45# compatible (2.6+ and 3.0+). See #19308. 46 47from __future__ import print_function 48import gdb 49import os 50import locale 51import sys 52 53if sys.version_info[0] >= 3: 54 unichr = chr 55 xrange = range 56 long = int 57 58# Look up the gdb.Type for some standard types: 59# Those need to be refreshed as types (pointer sizes) may change when 60# gdb loads different executables 61 62def _type_char_ptr(): 63 return gdb.lookup_type('char').pointer() # char* 64 65 66def _type_unsigned_char_ptr(): 67 return gdb.lookup_type('unsigned char').pointer() # unsigned char* 68 69 70def _type_unsigned_short_ptr(): 71 return gdb.lookup_type('unsigned short').pointer() 72 73 74def _type_unsigned_int_ptr(): 75 return gdb.lookup_type('unsigned int').pointer() 76 77 78def _sizeof_void_p(): 79 return gdb.lookup_type('void').pointer().sizeof 80 81 82# value computed later, see PyUnicodeObjectPtr.proxy() 83_is_pep393 = None 84 85Py_TPFLAGS_HEAPTYPE = (1 << 9) 86Py_TPFLAGS_LONG_SUBCLASS = (1 << 24) 87Py_TPFLAGS_LIST_SUBCLASS = (1 << 25) 88Py_TPFLAGS_TUPLE_SUBCLASS = (1 << 26) 89Py_TPFLAGS_BYTES_SUBCLASS = (1 << 27) 90Py_TPFLAGS_UNICODE_SUBCLASS = (1 << 28) 91Py_TPFLAGS_DICT_SUBCLASS = (1 << 29) 92Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30) 93Py_TPFLAGS_TYPE_SUBCLASS = (1 << 31) 94 95 96MAX_OUTPUT_LEN=1024 97 98hexdigits = "0123456789abcdef" 99 100ENCODING = locale.getpreferredencoding() 101 102FRAME_INFO_OPTIMIZED_OUT = '(frame information optimized out)' 103UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame' 104EVALFRAME = '_PyEval_EvalFrameDefault' 105 106class NullPyObjectPtr(RuntimeError): 107 pass 108 109 110def safety_limit(val): 111 # Given an integer value from the process being debugged, limit it to some 112 # safety threshold so that arbitrary breakage within said process doesn't 113 # break the gdb process too much (e.g. sizes of iterations, sizes of lists) 114 return min(val, 1000) 115 116 117def safe_range(val): 118 # As per range, but don't trust the value too much: cap it to a safety 119 # threshold in case the data was corrupted 120 return xrange(safety_limit(int(val))) 121 122if sys.version_info[0] >= 3: 123 def write_unicode(file, text): 124 file.write(text) 125else: 126 def write_unicode(file, text): 127 # Write a byte or unicode string to file. Unicode strings are encoded to 128 # ENCODING encoding with 'backslashreplace' error handler to avoid 129 # UnicodeEncodeError. 130 if isinstance(text, unicode): 131 text = text.encode(ENCODING, 'backslashreplace') 132 file.write(text) 133 134try: 135 os_fsencode = os.fsencode 136except AttributeError: 137 def os_fsencode(filename): 138 if not isinstance(filename, unicode): 139 return filename 140 encoding = sys.getfilesystemencoding() 141 if encoding == 'mbcs': 142 # mbcs doesn't support surrogateescape 143 return filename.encode(encoding) 144 encoded = [] 145 for char in filename: 146 # surrogateescape error handler 147 if 0xDC80 <= ord(char) <= 0xDCFF: 148 byte = chr(ord(char) - 0xDC00) 149 else: 150 byte = char.encode(encoding) 151 encoded.append(byte) 152 return ''.join(encoded) 153 154class StringTruncated(RuntimeError): 155 pass 156 157class TruncatedStringIO(object): 158 '''Similar to io.StringIO, but can truncate the output by raising a 159 StringTruncated exception''' 160 def __init__(self, maxlen=None): 161 self._val = '' 162 self.maxlen = maxlen 163 164 def write(self, data): 165 if self.maxlen: 166 if len(data) + len(self._val) > self.maxlen: 167 # Truncation: 168 self._val += data[0:self.maxlen - len(self._val)] 169 raise StringTruncated() 170 171 self._val += data 172 173 def getvalue(self): 174 return self._val 175 176class PyObjectPtr(object): 177 """ 178 Class wrapping a gdb.Value that's either a (PyObject*) within the 179 inferior process, or some subclass pointer e.g. (PyBytesObject*) 180 181 There will be a subclass for every refined PyObject type that we care 182 about. 183 184 Note that at every stage the underlying pointer could be NULL, point 185 to corrupt data, etc; this is the debugger, after all. 186 """ 187 _typename = 'PyObject' 188 189 def __init__(self, gdbval, cast_to=None): 190 if cast_to: 191 self._gdbval = gdbval.cast(cast_to) 192 else: 193 self._gdbval = gdbval 194 195 def field(self, name): 196 ''' 197 Get the gdb.Value for the given field within the PyObject, coping with 198 some python 2 versus python 3 differences. 199 200 Various libpython types are defined using the "PyObject_HEAD" and 201 "PyObject_VAR_HEAD" macros. 202 203 In Python 2, this these are defined so that "ob_type" and (for a var 204 object) "ob_size" are fields of the type in question. 205 206 In Python 3, this is defined as an embedded PyVarObject type thus: 207 PyVarObject ob_base; 208 so that the "ob_size" field is located insize the "ob_base" field, and 209 the "ob_type" is most easily accessed by casting back to a (PyObject*). 210 ''' 211 if self.is_null(): 212 raise NullPyObjectPtr(self) 213 214 if name == 'ob_type': 215 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) 216 return pyo_ptr.dereference()[name] 217 218 if name == 'ob_size': 219 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) 220 return pyo_ptr.dereference()[name] 221 222 # General case: look it up inside the object: 223 return self._gdbval.dereference()[name] 224 225 def pyop_field(self, name): 226 ''' 227 Get a PyObjectPtr for the given PyObject* field within this PyObject, 228 coping with some python 2 versus python 3 differences. 229 ''' 230 return PyObjectPtr.from_pyobject_ptr(self.field(name)) 231 232 def write_field_repr(self, name, out, visited): 233 ''' 234 Extract the PyObject* field named "name", and write its representation 235 to file-like object "out" 236 ''' 237 field_obj = self.pyop_field(name) 238 field_obj.write_repr(out, visited) 239 240 def get_truncated_repr(self, maxlen): 241 ''' 242 Get a repr-like string for the data, but truncate it at "maxlen" bytes 243 (ending the object graph traversal as soon as you do) 244 ''' 245 out = TruncatedStringIO(maxlen) 246 try: 247 self.write_repr(out, set()) 248 except StringTruncated: 249 # Truncation occurred: 250 return out.getvalue() + '...(truncated)' 251 252 # No truncation occurred: 253 return out.getvalue() 254 255 def type(self): 256 return PyTypeObjectPtr(self.field('ob_type')) 257 258 def is_null(self): 259 return 0 == long(self._gdbval) 260 261 def is_optimized_out(self): 262 ''' 263 Is the value of the underlying PyObject* visible to the debugger? 264 265 This can vary with the precise version of the compiler used to build 266 Python, and the precise version of gdb. 267 268 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with 269 PyEval_EvalFrameEx's "f" 270 ''' 271 return self._gdbval.is_optimized_out 272 273 def safe_tp_name(self): 274 try: 275 ob_type = self.type() 276 tp_name = ob_type.field('tp_name') 277 return tp_name.string() 278 # NullPyObjectPtr: NULL tp_name? 279 # RuntimeError: Can't even read the object at all? 280 # UnicodeDecodeError: Failed to decode tp_name bytestring 281 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 282 return 'unknown' 283 284 def proxyval(self, visited): 285 ''' 286 Scrape a value from the inferior process, and try to represent it 287 within the gdb process, whilst (hopefully) avoiding crashes when 288 the remote data is corrupt. 289 290 Derived classes will override this. 291 292 For example, a PyIntObject* with ob_ival 42 in the inferior process 293 should result in an int(42) in this process. 294 295 visited: a set of all gdb.Value pyobject pointers already visited 296 whilst generating this value (to guard against infinite recursion when 297 visiting object graphs with loops). Analogous to Py_ReprEnter and 298 Py_ReprLeave 299 ''' 300 301 class FakeRepr(object): 302 """ 303 Class representing a non-descript PyObject* value in the inferior 304 process for when we don't have a custom scraper, intended to have 305 a sane repr(). 306 """ 307 308 def __init__(self, tp_name, address): 309 self.tp_name = tp_name 310 self.address = address 311 312 def __repr__(self): 313 # For the NULL pointer, we have no way of knowing a type, so 314 # special-case it as per 315 # http://bugs.python.org/issue8032#msg100882 316 if self.address == 0: 317 return '0x0' 318 return '<%s at remote 0x%x>' % (self.tp_name, self.address) 319 320 return FakeRepr(self.safe_tp_name(), 321 long(self._gdbval)) 322 323 def write_repr(self, out, visited): 324 ''' 325 Write a string representation of the value scraped from the inferior 326 process to "out", a file-like object. 327 ''' 328 # Default implementation: generate a proxy value and write its repr 329 # However, this could involve a lot of work for complicated objects, 330 # so for derived classes we specialize this 331 return out.write(repr(self.proxyval(visited))) 332 333 @classmethod 334 def subclass_from_type(cls, t): 335 ''' 336 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a 337 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr 338 to use 339 340 Ideally, we would look up the symbols for the global types, but that 341 isn't working yet: 342 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value 343 Traceback (most recent call last): 344 File "<string>", line 1, in <module> 345 NotImplementedError: Symbol type not yet supported in Python scripts. 346 Error while executing Python code. 347 348 For now, we use tp_flags, after doing some string comparisons on the 349 tp_name for some special-cases that don't seem to be visible through 350 flags 351 ''' 352 try: 353 tp_name = t.field('tp_name').string() 354 tp_flags = int(t.field('tp_flags')) 355 # RuntimeError: NULL pointers 356 # UnicodeDecodeError: string() fails to decode the bytestring 357 except (RuntimeError, UnicodeDecodeError): 358 # Handle any kind of error e.g. NULL ptrs by simply using the base 359 # class 360 return cls 361 362 #print('tp_flags = 0x%08x' % tp_flags) 363 #print('tp_name = %r' % tp_name) 364 365 name_map = {'bool': PyBoolObjectPtr, 366 'classobj': PyClassObjectPtr, 367 'NoneType': PyNoneStructPtr, 368 'frame': PyFrameObjectPtr, 369 'set' : PySetObjectPtr, 370 'frozenset' : PySetObjectPtr, 371 'builtin_function_or_method' : PyCFunctionObjectPtr, 372 'method-wrapper': wrapperobject, 373 } 374 if tp_name in name_map: 375 return name_map[tp_name] 376 377 if tp_flags & Py_TPFLAGS_HEAPTYPE: 378 return HeapTypeObjectPtr 379 380 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: 381 return PyLongObjectPtr 382 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: 383 return PyListObjectPtr 384 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: 385 return PyTupleObjectPtr 386 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: 387 return PyBytesObjectPtr 388 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: 389 return PyUnicodeObjectPtr 390 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: 391 return PyDictObjectPtr 392 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: 393 return PyBaseExceptionObjectPtr 394 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: 395 # return PyTypeObjectPtr 396 397 # Use the base class: 398 return cls 399 400 @classmethod 401 def from_pyobject_ptr(cls, gdbval): 402 ''' 403 Try to locate the appropriate derived class dynamically, and cast 404 the pointer accordingly. 405 ''' 406 try: 407 p = PyObjectPtr(gdbval) 408 cls = cls.subclass_from_type(p.type()) 409 return cls(gdbval, cast_to=cls.get_gdb_type()) 410 except RuntimeError: 411 # Handle any kind of error e.g. NULL ptrs by simply using the base 412 # class 413 pass 414 return cls(gdbval) 415 416 @classmethod 417 def get_gdb_type(cls): 418 return gdb.lookup_type(cls._typename).pointer() 419 420 def as_address(self): 421 return long(self._gdbval) 422 423class PyVarObjectPtr(PyObjectPtr): 424 _typename = 'PyVarObject' 425 426class ProxyAlreadyVisited(object): 427 ''' 428 Placeholder proxy to use when protecting against infinite recursion due to 429 loops in the object graph. 430 431 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave 432 ''' 433 def __init__(self, rep): 434 self._rep = rep 435 436 def __repr__(self): 437 return self._rep 438 439 440def _write_instance_repr(out, visited, name, pyop_attrdict, address): 441 '''Shared code for use by all classes: 442 write a representation to file-like object "out"''' 443 out.write('<') 444 out.write(name) 445 446 # Write dictionary of instance attributes: 447 if isinstance(pyop_attrdict, PyDictObjectPtr): 448 out.write('(') 449 first = True 450 for pyop_arg, pyop_val in pyop_attrdict.iteritems(): 451 if not first: 452 out.write(', ') 453 first = False 454 out.write(pyop_arg.proxyval(visited)) 455 out.write('=') 456 pyop_val.write_repr(out, visited) 457 out.write(')') 458 out.write(' at remote 0x%x>' % address) 459 460 461class InstanceProxy(object): 462 463 def __init__(self, cl_name, attrdict, address): 464 self.cl_name = cl_name 465 self.attrdict = attrdict 466 self.address = address 467 468 def __repr__(self): 469 if isinstance(self.attrdict, dict): 470 kwargs = ', '.join(["%s=%r" % (arg, val) 471 for arg, val in self.attrdict.items()]) 472 return '<%s(%s) at remote 0x%x>' % (self.cl_name, 473 kwargs, self.address) 474 else: 475 return '<%s at remote 0x%x>' % (self.cl_name, 476 self.address) 477 478def _PyObject_VAR_SIZE(typeobj, nitems): 479 if _PyObject_VAR_SIZE._type_size_t is None: 480 _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t') 481 482 return ( ( typeobj.field('tp_basicsize') + 483 nitems * typeobj.field('tp_itemsize') + 484 (_sizeof_void_p() - 1) 485 ) & ~(_sizeof_void_p() - 1) 486 ).cast(_PyObject_VAR_SIZE._type_size_t) 487_PyObject_VAR_SIZE._type_size_t = None 488 489class HeapTypeObjectPtr(PyObjectPtr): 490 _typename = 'PyObject' 491 492 def get_attr_dict(self): 493 ''' 494 Get the PyDictObject ptr representing the attribute dictionary 495 (or None if there's a problem) 496 ''' 497 try: 498 typeobj = self.type() 499 dictoffset = int_from_int(typeobj.field('tp_dictoffset')) 500 if dictoffset != 0: 501 if dictoffset < 0: 502 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() 503 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) 504 if tsize < 0: 505 tsize = -tsize 506 size = _PyObject_VAR_SIZE(typeobj, tsize) 507 dictoffset += size 508 assert dictoffset > 0 509 assert dictoffset % _sizeof_void_p() == 0 510 511 dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset 512 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() 513 dictptr = dictptr.cast(PyObjectPtrPtr) 514 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) 515 except RuntimeError: 516 # Corrupt data somewhere; fail safe 517 pass 518 519 # Not found, or some kind of error: 520 return None 521 522 def proxyval(self, visited): 523 ''' 524 Support for classes. 525 526 Currently we just locate the dictionary using a transliteration to 527 python of _PyObject_GetDictPtr, ignoring descriptors 528 ''' 529 # Guard against infinite loops: 530 if self.as_address() in visited: 531 return ProxyAlreadyVisited('<...>') 532 visited.add(self.as_address()) 533 534 pyop_attr_dict = self.get_attr_dict() 535 if pyop_attr_dict: 536 attr_dict = pyop_attr_dict.proxyval(visited) 537 else: 538 attr_dict = {} 539 tp_name = self.safe_tp_name() 540 541 # Class: 542 return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) 543 544 def write_repr(self, out, visited): 545 # Guard against infinite loops: 546 if self.as_address() in visited: 547 out.write('<...>') 548 return 549 visited.add(self.as_address()) 550 551 pyop_attrdict = self.get_attr_dict() 552 _write_instance_repr(out, visited, 553 self.safe_tp_name(), pyop_attrdict, self.as_address()) 554 555class ProxyException(Exception): 556 def __init__(self, tp_name, args): 557 self.tp_name = tp_name 558 self.args = args 559 560 def __repr__(self): 561 return '%s%r' % (self.tp_name, self.args) 562 563class PyBaseExceptionObjectPtr(PyObjectPtr): 564 """ 565 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception 566 within the process being debugged. 567 """ 568 _typename = 'PyBaseExceptionObject' 569 570 def proxyval(self, visited): 571 # Guard against infinite loops: 572 if self.as_address() in visited: 573 return ProxyAlreadyVisited('(...)') 574 visited.add(self.as_address()) 575 arg_proxy = self.pyop_field('args').proxyval(visited) 576 return ProxyException(self.safe_tp_name(), 577 arg_proxy) 578 579 def write_repr(self, out, visited): 580 # Guard against infinite loops: 581 if self.as_address() in visited: 582 out.write('(...)') 583 return 584 visited.add(self.as_address()) 585 586 out.write(self.safe_tp_name()) 587 self.write_field_repr('args', out, visited) 588 589class PyClassObjectPtr(PyObjectPtr): 590 """ 591 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> 592 instance within the process being debugged. 593 """ 594 _typename = 'PyClassObject' 595 596 597class BuiltInFunctionProxy(object): 598 def __init__(self, ml_name): 599 self.ml_name = ml_name 600 601 def __repr__(self): 602 return "<built-in function %s>" % self.ml_name 603 604class BuiltInMethodProxy(object): 605 def __init__(self, ml_name, pyop_m_self): 606 self.ml_name = ml_name 607 self.pyop_m_self = pyop_m_self 608 609 def __repr__(self): 610 return ('<built-in method %s of %s object at remote 0x%x>' 611 % (self.ml_name, 612 self.pyop_m_self.safe_tp_name(), 613 self.pyop_m_self.as_address()) 614 ) 615 616class PyCFunctionObjectPtr(PyObjectPtr): 617 """ 618 Class wrapping a gdb.Value that's a PyCFunctionObject* 619 (see Include/methodobject.h and Objects/methodobject.c) 620 """ 621 _typename = 'PyCFunctionObject' 622 623 def proxyval(self, visited): 624 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) 625 try: 626 ml_name = m_ml['ml_name'].string() 627 except UnicodeDecodeError: 628 ml_name = '<ml_name:UnicodeDecodeError>' 629 630 pyop_m_self = self.pyop_field('m_self') 631 if pyop_m_self.is_null(): 632 return BuiltInFunctionProxy(ml_name) 633 else: 634 return BuiltInMethodProxy(ml_name, pyop_m_self) 635 636 637class PyCodeObjectPtr(PyObjectPtr): 638 """ 639 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance 640 within the process being debugged. 641 """ 642 _typename = 'PyCodeObject' 643 644 def addr2line(self, addrq): 645 ''' 646 Get the line number for a given bytecode offset 647 648 Analogous to PyCode_Addr2Line; translated from pseudocode in 649 Objects/lnotab_notes.txt 650 ''' 651 co_linetable = self.pyop_field('co_linetable').proxyval(set()) 652 653 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line 654 # not 0, as lnotab_notes.txt has it: 655 lineno = int_from_int(self.field('co_firstlineno')) 656 657 if addrq < 0: 658 return lineno 659 addr = 0 660 for addr_incr, line_incr in zip(co_linetable[::2], co_linetable[1::2]): 661 if addr_incr == 255: 662 break 663 addr += ord(addr_incr) 664 line_delta = ord(line_incr) 665 if line_delta == 128: 666 line_delta = 0 667 elif line_delta > 128: 668 line_delta -= 256 669 lineno += line_delta 670 if addr > addrq: 671 return lineno 672 assert False, "Unreachable" 673 674 675class PyDictObjectPtr(PyObjectPtr): 676 """ 677 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance 678 within the process being debugged. 679 """ 680 _typename = 'PyDictObject' 681 682 def iteritems(self): 683 ''' 684 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, 685 analogous to dict.iteritems() 686 ''' 687 keys = self.field('ma_keys') 688 values = self.field('ma_values') 689 entries, nentries = self._get_entries(keys) 690 for i in safe_range(nentries): 691 ep = entries[i] 692 if long(values): 693 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) 694 else: 695 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) 696 if not pyop_value.is_null(): 697 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 698 yield (pyop_key, pyop_value) 699 700 def proxyval(self, visited): 701 # Guard against infinite loops: 702 if self.as_address() in visited: 703 return ProxyAlreadyVisited('{...}') 704 visited.add(self.as_address()) 705 706 result = {} 707 for pyop_key, pyop_value in self.iteritems(): 708 proxy_key = pyop_key.proxyval(visited) 709 proxy_value = pyop_value.proxyval(visited) 710 result[proxy_key] = proxy_value 711 return result 712 713 def write_repr(self, out, visited): 714 # Guard against infinite loops: 715 if self.as_address() in visited: 716 out.write('{...}') 717 return 718 visited.add(self.as_address()) 719 720 out.write('{') 721 first = True 722 for pyop_key, pyop_value in self.iteritems(): 723 if not first: 724 out.write(', ') 725 first = False 726 pyop_key.write_repr(out, visited) 727 out.write(': ') 728 pyop_value.write_repr(out, visited) 729 out.write('}') 730 731 def _get_entries(self, keys): 732 dk_nentries = int(keys['dk_nentries']) 733 dk_size = int(keys['dk_size']) 734 try: 735 # <= Python 3.5 736 return keys['dk_entries'], dk_size 737 except RuntimeError: 738 # >= Python 3.6 739 pass 740 741 if dk_size <= 0xFF: 742 offset = dk_size 743 elif dk_size <= 0xFFFF: 744 offset = 2 * dk_size 745 elif dk_size <= 0xFFFFFFFF: 746 offset = 4 * dk_size 747 else: 748 offset = 8 * dk_size 749 750 ent_addr = keys['dk_indices'].address 751 ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset 752 ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer() 753 ent_addr = ent_addr.cast(ent_ptr_t) 754 755 return ent_addr, dk_nentries 756 757 758class PyListObjectPtr(PyObjectPtr): 759 _typename = 'PyListObject' 760 761 def __getitem__(self, i): 762 # Get the gdb.Value for the (PyObject*) with the given index: 763 field_ob_item = self.field('ob_item') 764 return field_ob_item[i] 765 766 def proxyval(self, visited): 767 # Guard against infinite loops: 768 if self.as_address() in visited: 769 return ProxyAlreadyVisited('[...]') 770 visited.add(self.as_address()) 771 772 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 773 for i in safe_range(int_from_int(self.field('ob_size')))] 774 return result 775 776 def write_repr(self, out, visited): 777 # Guard against infinite loops: 778 if self.as_address() in visited: 779 out.write('[...]') 780 return 781 visited.add(self.as_address()) 782 783 out.write('[') 784 for i in safe_range(int_from_int(self.field('ob_size'))): 785 if i > 0: 786 out.write(', ') 787 element = PyObjectPtr.from_pyobject_ptr(self[i]) 788 element.write_repr(out, visited) 789 out.write(']') 790 791class PyLongObjectPtr(PyObjectPtr): 792 _typename = 'PyLongObject' 793 794 def proxyval(self, visited): 795 ''' 796 Python's Include/longobjrep.h has this declaration: 797 struct _longobject { 798 PyObject_VAR_HEAD 799 digit ob_digit[1]; 800 }; 801 802 with this description: 803 The absolute value of a number is equal to 804 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) 805 Negative numbers are represented with ob_size < 0; 806 zero is represented by ob_size == 0. 807 808 where SHIFT can be either: 809 #define PyLong_SHIFT 30 810 #define PyLong_SHIFT 15 811 ''' 812 ob_size = long(self.field('ob_size')) 813 if ob_size == 0: 814 return 0 815 816 ob_digit = self.field('ob_digit') 817 818 if gdb.lookup_type('digit').sizeof == 2: 819 SHIFT = 15 820 else: 821 SHIFT = 30 822 823 digits = [long(ob_digit[i]) * 2**(SHIFT*i) 824 for i in safe_range(abs(ob_size))] 825 result = sum(digits) 826 if ob_size < 0: 827 result = -result 828 return result 829 830 def write_repr(self, out, visited): 831 # Write this out as a Python 3 int literal, i.e. without the "L" suffix 832 proxy = self.proxyval(visited) 833 out.write("%s" % proxy) 834 835 836class PyBoolObjectPtr(PyLongObjectPtr): 837 """ 838 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two 839 <bool> instances (Py_True/Py_False) within the process being debugged. 840 """ 841 def proxyval(self, visited): 842 if PyLongObjectPtr.proxyval(self, visited): 843 return True 844 else: 845 return False 846 847class PyNoneStructPtr(PyObjectPtr): 848 """ 849 Class wrapping a gdb.Value that's a PyObject* pointing to the 850 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type 851 """ 852 _typename = 'PyObject' 853 854 def proxyval(self, visited): 855 return None 856 857 858class PyFrameObjectPtr(PyObjectPtr): 859 _typename = 'PyFrameObject' 860 861 def __init__(self, gdbval, cast_to=None): 862 PyObjectPtr.__init__(self, gdbval, cast_to) 863 864 if not self.is_optimized_out(): 865 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code')) 866 self.co_name = self.co.pyop_field('co_name') 867 self.co_filename = self.co.pyop_field('co_filename') 868 869 self.f_lineno = int_from_int(self.field('f_lineno')) 870 self.f_lasti = int_from_int(self.field('f_lasti')) 871 self.co_nlocals = int_from_int(self.co.field('co_nlocals')) 872 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) 873 874 def iter_locals(self): 875 ''' 876 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 877 the local variables of this frame 878 ''' 879 if self.is_optimized_out(): 880 return 881 882 f_localsplus = self.field('f_localsplus') 883 for i in safe_range(self.co_nlocals): 884 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) 885 if not pyop_value.is_null(): 886 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) 887 yield (pyop_name, pyop_value) 888 889 def iter_globals(self): 890 ''' 891 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 892 the global variables of this frame 893 ''' 894 if self.is_optimized_out(): 895 return () 896 897 pyop_globals = self.pyop_field('f_globals') 898 return pyop_globals.iteritems() 899 900 def iter_builtins(self): 901 ''' 902 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 903 the builtin variables 904 ''' 905 if self.is_optimized_out(): 906 return () 907 908 pyop_builtins = self.pyop_field('f_builtins') 909 return pyop_builtins.iteritems() 910 911 def get_var_by_name(self, name): 912 ''' 913 Look for the named local variable, returning a (PyObjectPtr, scope) pair 914 where scope is a string 'local', 'global', 'builtin' 915 916 If not found, return (None, None) 917 ''' 918 for pyop_name, pyop_value in self.iter_locals(): 919 if name == pyop_name.proxyval(set()): 920 return pyop_value, 'local' 921 for pyop_name, pyop_value in self.iter_globals(): 922 if name == pyop_name.proxyval(set()): 923 return pyop_value, 'global' 924 for pyop_name, pyop_value in self.iter_builtins(): 925 if name == pyop_name.proxyval(set()): 926 return pyop_value, 'builtin' 927 return None, None 928 929 def filename(self): 930 '''Get the path of the current Python source file, as a string''' 931 if self.is_optimized_out(): 932 return FRAME_INFO_OPTIMIZED_OUT 933 return self.co_filename.proxyval(set()) 934 935 def current_line_num(self): 936 '''Get current line number as an integer (1-based) 937 938 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 939 940 See Objects/lnotab_notes.txt 941 ''' 942 if self.is_optimized_out(): 943 return None 944 f_trace = self.field('f_trace') 945 if long(f_trace) != 0: 946 # we have a non-NULL f_trace: 947 return self.f_lineno 948 949 try: 950 return self.co.addr2line(self.f_lasti*2) 951 except Exception: 952 # bpo-34989: addr2line() is a complex function, it can fail in many 953 # ways. For example, it fails with a TypeError on "FakeRepr" if 954 # gdb fails to load debug symbols. Use a catch-all "except 955 # Exception" to make the whole function safe. The caller has to 956 # handle None anyway for optimized Python. 957 return None 958 959 def current_line(self): 960 '''Get the text of the current source line as a string, with a trailing 961 newline character''' 962 if self.is_optimized_out(): 963 return FRAME_INFO_OPTIMIZED_OUT 964 965 lineno = self.current_line_num() 966 if lineno is None: 967 return '(failed to get frame line number)' 968 969 filename = self.filename() 970 try: 971 with open(os_fsencode(filename), 'r') as fp: 972 lines = fp.readlines() 973 except IOError: 974 return None 975 976 try: 977 # Convert from 1-based current_line_num to 0-based list offset 978 return lines[lineno - 1] 979 except IndexError: 980 return None 981 982 def write_repr(self, out, visited): 983 if self.is_optimized_out(): 984 out.write(FRAME_INFO_OPTIMIZED_OUT) 985 return 986 lineno = self.current_line_num() 987 lineno = str(lineno) if lineno is not None else "?" 988 out.write('Frame 0x%x, for file %s, line %s, in %s (' 989 % (self.as_address(), 990 self.co_filename.proxyval(visited), 991 lineno, 992 self.co_name.proxyval(visited))) 993 first = True 994 for pyop_name, pyop_value in self.iter_locals(): 995 if not first: 996 out.write(', ') 997 first = False 998 999 out.write(pyop_name.proxyval(visited)) 1000 out.write('=') 1001 pyop_value.write_repr(out, visited) 1002 1003 out.write(')') 1004 1005 def print_traceback(self): 1006 if self.is_optimized_out(): 1007 sys.stdout.write(' %s\n' % FRAME_INFO_OPTIMIZED_OUT) 1008 return 1009 visited = set() 1010 lineno = self.current_line_num() 1011 lineno = str(lineno) if lineno is not None else "?" 1012 sys.stdout.write(' File "%s", line %s, in %s\n' 1013 % (self.co_filename.proxyval(visited), 1014 lineno, 1015 self.co_name.proxyval(visited))) 1016 1017class PySetObjectPtr(PyObjectPtr): 1018 _typename = 'PySetObject' 1019 1020 @classmethod 1021 def _dummy_key(self): 1022 return gdb.lookup_global_symbol('_PySet_Dummy').value() 1023 1024 def __iter__(self): 1025 dummy_ptr = self._dummy_key() 1026 table = self.field('table') 1027 for i in safe_range(self.field('mask') + 1): 1028 setentry = table[i] 1029 key = setentry['key'] 1030 if key != 0 and key != dummy_ptr: 1031 yield PyObjectPtr.from_pyobject_ptr(key) 1032 1033 def proxyval(self, visited): 1034 # Guard against infinite loops: 1035 if self.as_address() in visited: 1036 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) 1037 visited.add(self.as_address()) 1038 1039 members = (key.proxyval(visited) for key in self) 1040 if self.safe_tp_name() == 'frozenset': 1041 return frozenset(members) 1042 else: 1043 return set(members) 1044 1045 def write_repr(self, out, visited): 1046 # Emulate Python 3's set_repr 1047 tp_name = self.safe_tp_name() 1048 1049 # Guard against infinite loops: 1050 if self.as_address() in visited: 1051 out.write('(...)') 1052 return 1053 visited.add(self.as_address()) 1054 1055 # Python 3's set_repr special-cases the empty set: 1056 if not self.field('used'): 1057 out.write(tp_name) 1058 out.write('()') 1059 return 1060 1061 # Python 3 uses {} for set literals: 1062 if tp_name != 'set': 1063 out.write(tp_name) 1064 out.write('(') 1065 1066 out.write('{') 1067 first = True 1068 for key in self: 1069 if not first: 1070 out.write(', ') 1071 first = False 1072 key.write_repr(out, visited) 1073 out.write('}') 1074 1075 if tp_name != 'set': 1076 out.write(')') 1077 1078 1079class PyBytesObjectPtr(PyObjectPtr): 1080 _typename = 'PyBytesObject' 1081 1082 def __str__(self): 1083 field_ob_size = self.field('ob_size') 1084 field_ob_sval = self.field('ob_sval') 1085 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr()) 1086 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) 1087 1088 def proxyval(self, visited): 1089 return str(self) 1090 1091 def write_repr(self, out, visited): 1092 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix 1093 1094 # Get a PyStringObject* within the Python 2 gdb process: 1095 proxy = self.proxyval(visited) 1096 1097 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr 1098 # to Python 2 code: 1099 quote = "'" 1100 if "'" in proxy and not '"' in proxy: 1101 quote = '"' 1102 out.write('b') 1103 out.write(quote) 1104 for byte in proxy: 1105 if byte == quote or byte == '\\': 1106 out.write('\\') 1107 out.write(byte) 1108 elif byte == '\t': 1109 out.write('\\t') 1110 elif byte == '\n': 1111 out.write('\\n') 1112 elif byte == '\r': 1113 out.write('\\r') 1114 elif byte < ' ' or ord(byte) >= 0x7f: 1115 out.write('\\x') 1116 out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) 1117 out.write(hexdigits[ord(byte) & 0xf]) 1118 else: 1119 out.write(byte) 1120 out.write(quote) 1121 1122class PyTupleObjectPtr(PyObjectPtr): 1123 _typename = 'PyTupleObject' 1124 1125 def __getitem__(self, i): 1126 # Get the gdb.Value for the (PyObject*) with the given index: 1127 field_ob_item = self.field('ob_item') 1128 return field_ob_item[i] 1129 1130 def proxyval(self, visited): 1131 # Guard against infinite loops: 1132 if self.as_address() in visited: 1133 return ProxyAlreadyVisited('(...)') 1134 visited.add(self.as_address()) 1135 1136 result = tuple(PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 1137 for i in safe_range(int_from_int(self.field('ob_size')))) 1138 return result 1139 1140 def write_repr(self, out, visited): 1141 # Guard against infinite loops: 1142 if self.as_address() in visited: 1143 out.write('(...)') 1144 return 1145 visited.add(self.as_address()) 1146 1147 out.write('(') 1148 for i in safe_range(int_from_int(self.field('ob_size'))): 1149 if i > 0: 1150 out.write(', ') 1151 element = PyObjectPtr.from_pyobject_ptr(self[i]) 1152 element.write_repr(out, visited) 1153 if self.field('ob_size') == 1: 1154 out.write(',)') 1155 else: 1156 out.write(')') 1157 1158class PyTypeObjectPtr(PyObjectPtr): 1159 _typename = 'PyTypeObject' 1160 1161 1162def _unichr_is_printable(char): 1163 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py 1164 if char == u" ": 1165 return True 1166 import unicodedata 1167 return unicodedata.category(char) not in ("C", "Z") 1168 1169if sys.maxunicode >= 0x10000: 1170 _unichr = unichr 1171else: 1172 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb 1173 def _unichr(x): 1174 if x < 0x10000: 1175 return unichr(x) 1176 x -= 0x10000 1177 ch1 = 0xD800 | (x >> 10) 1178 ch2 = 0xDC00 | (x & 0x3FF) 1179 return unichr(ch1) + unichr(ch2) 1180 1181 1182class PyUnicodeObjectPtr(PyObjectPtr): 1183 _typename = 'PyUnicodeObject' 1184 1185 def char_width(self): 1186 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') 1187 return _type_Py_UNICODE.sizeof 1188 1189 def proxyval(self, visited): 1190 global _is_pep393 1191 if _is_pep393 is None: 1192 fields = gdb.lookup_type('PyUnicodeObject').fields() 1193 _is_pep393 = 'data' in [f.name for f in fields] 1194 if _is_pep393: 1195 # Python 3.3 and newer 1196 may_have_surrogates = False 1197 compact = self.field('_base') 1198 ascii = compact['_base'] 1199 state = ascii['state'] 1200 is_compact_ascii = (int(state['ascii']) and int(state['compact'])) 1201 if not int(state['ready']): 1202 # string is not ready 1203 field_length = long(compact['wstr_length']) 1204 may_have_surrogates = True 1205 field_str = ascii['wstr'] 1206 else: 1207 field_length = long(ascii['length']) 1208 if is_compact_ascii: 1209 field_str = ascii.address + 1 1210 elif int(state['compact']): 1211 field_str = compact.address + 1 1212 else: 1213 field_str = self.field('data')['any'] 1214 repr_kind = int(state['kind']) 1215 if repr_kind == 1: 1216 field_str = field_str.cast(_type_unsigned_char_ptr()) 1217 elif repr_kind == 2: 1218 field_str = field_str.cast(_type_unsigned_short_ptr()) 1219 elif repr_kind == 4: 1220 field_str = field_str.cast(_type_unsigned_int_ptr()) 1221 else: 1222 # Python 3.2 and earlier 1223 field_length = long(self.field('length')) 1224 field_str = self.field('str') 1225 may_have_surrogates = self.char_width() == 2 1226 1227 # Gather a list of ints from the Py_UNICODE array; these are either 1228 # UCS-1, UCS-2 or UCS-4 code points: 1229 if not may_have_surrogates: 1230 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] 1231 else: 1232 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the 1233 # inferior process: we must join surrogate pairs. 1234 Py_UNICODEs = [] 1235 i = 0 1236 limit = safety_limit(field_length) 1237 while i < limit: 1238 ucs = int(field_str[i]) 1239 i += 1 1240 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: 1241 Py_UNICODEs.append(ucs) 1242 continue 1243 # This could be a surrogate pair. 1244 ucs2 = int(field_str[i]) 1245 if ucs2 < 0xDC00 or ucs2 > 0xDFFF: 1246 continue 1247 code = (ucs & 0x03FF) << 10 1248 code |= ucs2 & 0x03FF 1249 code += 0x00010000 1250 Py_UNICODEs.append(code) 1251 i += 1 1252 1253 # Convert the int code points to unicode characters, and generate a 1254 # local unicode instance. 1255 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). 1256 result = u''.join([ 1257 (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd') 1258 for ucs in Py_UNICODEs]) 1259 return result 1260 1261 def write_repr(self, out, visited): 1262 # Write this out as a Python 3 str literal, i.e. without a "u" prefix 1263 1264 # Get a PyUnicodeObject* within the Python 2 gdb process: 1265 proxy = self.proxyval(visited) 1266 1267 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr 1268 # to Python 2: 1269 if "'" in proxy and '"' not in proxy: 1270 quote = '"' 1271 else: 1272 quote = "'" 1273 out.write(quote) 1274 1275 i = 0 1276 while i < len(proxy): 1277 ch = proxy[i] 1278 i += 1 1279 1280 # Escape quotes and backslashes 1281 if ch == quote or ch == '\\': 1282 out.write('\\') 1283 out.write(ch) 1284 1285 # Map special whitespace to '\t', \n', '\r' 1286 elif ch == '\t': 1287 out.write('\\t') 1288 elif ch == '\n': 1289 out.write('\\n') 1290 elif ch == '\r': 1291 out.write('\\r') 1292 1293 # Map non-printable US ASCII to '\xhh' */ 1294 elif ch < ' ' or ch == 0x7F: 1295 out.write('\\x') 1296 out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) 1297 out.write(hexdigits[ord(ch) & 0x000F]) 1298 1299 # Copy ASCII characters as-is 1300 elif ord(ch) < 0x7F: 1301 out.write(ch) 1302 1303 # Non-ASCII characters 1304 else: 1305 ucs = ch 1306 ch2 = None 1307 if sys.maxunicode < 0x10000: 1308 # If sizeof(Py_UNICODE) is 2 here (in gdb), join 1309 # surrogate pairs before calling _unichr_is_printable. 1310 if (i < len(proxy) 1311 and 0xD800 <= ord(ch) < 0xDC00 \ 1312 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): 1313 ch2 = proxy[i] 1314 ucs = ch + ch2 1315 i += 1 1316 1317 # Unfortuately, Python 2's unicode type doesn't seem 1318 # to expose the "isprintable" method 1319 printable = _unichr_is_printable(ucs) 1320 if printable: 1321 try: 1322 ucs.encode(ENCODING) 1323 except UnicodeEncodeError: 1324 printable = False 1325 1326 # Map Unicode whitespace and control characters 1327 # (categories Z* and C* except ASCII space) 1328 if not printable: 1329 if ch2 is not None: 1330 # Match Python 3's representation of non-printable 1331 # wide characters. 1332 code = (ord(ch) & 0x03FF) << 10 1333 code |= ord(ch2) & 0x03FF 1334 code += 0x00010000 1335 else: 1336 code = ord(ucs) 1337 1338 # Map 8-bit characters to '\\xhh' 1339 if code <= 0xff: 1340 out.write('\\x') 1341 out.write(hexdigits[(code >> 4) & 0x000F]) 1342 out.write(hexdigits[code & 0x000F]) 1343 # Map 21-bit characters to '\U00xxxxxx' 1344 elif code >= 0x10000: 1345 out.write('\\U') 1346 out.write(hexdigits[(code >> 28) & 0x0000000F]) 1347 out.write(hexdigits[(code >> 24) & 0x0000000F]) 1348 out.write(hexdigits[(code >> 20) & 0x0000000F]) 1349 out.write(hexdigits[(code >> 16) & 0x0000000F]) 1350 out.write(hexdigits[(code >> 12) & 0x0000000F]) 1351 out.write(hexdigits[(code >> 8) & 0x0000000F]) 1352 out.write(hexdigits[(code >> 4) & 0x0000000F]) 1353 out.write(hexdigits[code & 0x0000000F]) 1354 # Map 16-bit characters to '\uxxxx' 1355 else: 1356 out.write('\\u') 1357 out.write(hexdigits[(code >> 12) & 0x000F]) 1358 out.write(hexdigits[(code >> 8) & 0x000F]) 1359 out.write(hexdigits[(code >> 4) & 0x000F]) 1360 out.write(hexdigits[code & 0x000F]) 1361 else: 1362 # Copy characters as-is 1363 out.write(ch) 1364 if ch2 is not None: 1365 out.write(ch2) 1366 1367 out.write(quote) 1368 1369 1370class wrapperobject(PyObjectPtr): 1371 _typename = 'wrapperobject' 1372 1373 def safe_name(self): 1374 try: 1375 name = self.field('descr')['d_base']['name'].string() 1376 return repr(name) 1377 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 1378 return '<unknown name>' 1379 1380 def safe_tp_name(self): 1381 try: 1382 return self.field('self')['ob_type']['tp_name'].string() 1383 except (NullPyObjectPtr, RuntimeError, UnicodeDecodeError): 1384 return '<unknown tp_name>' 1385 1386 def safe_self_addresss(self): 1387 try: 1388 address = long(self.field('self')) 1389 return '%#x' % address 1390 except (NullPyObjectPtr, RuntimeError): 1391 return '<failed to get self address>' 1392 1393 def proxyval(self, visited): 1394 name = self.safe_name() 1395 tp_name = self.safe_tp_name() 1396 self_address = self.safe_self_addresss() 1397 return ("<method-wrapper %s of %s object at %s>" 1398 % (name, tp_name, self_address)) 1399 1400 def write_repr(self, out, visited): 1401 proxy = self.proxyval(visited) 1402 out.write(proxy) 1403 1404 1405def int_from_int(gdbval): 1406 return int(gdbval) 1407 1408 1409def stringify(val): 1410 # TODO: repr() puts everything on one line; pformat can be nicer, but 1411 # can lead to v.long results; this function isolates the choice 1412 if True: 1413 return repr(val) 1414 else: 1415 from pprint import pformat 1416 return pformat(val) 1417 1418 1419class PyObjectPtrPrinter: 1420 "Prints a (PyObject*)" 1421 1422 def __init__ (self, gdbval): 1423 self.gdbval = gdbval 1424 1425 def to_string (self): 1426 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) 1427 if True: 1428 return pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1429 else: 1430 # Generate full proxy value then stringify it. 1431 # Doing so could be expensive 1432 proxyval = pyop.proxyval(set()) 1433 return stringify(proxyval) 1434 1435def pretty_printer_lookup(gdbval): 1436 type = gdbval.type.unqualified() 1437 if type.code != gdb.TYPE_CODE_PTR: 1438 return None 1439 1440 type = type.target().unqualified() 1441 t = str(type) 1442 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject", "wrapperobject"): 1443 return PyObjectPtrPrinter(gdbval) 1444 1445""" 1446During development, I've been manually invoking the code in this way: 1447(gdb) python 1448 1449import sys 1450sys.path.append('/home/david/coding/python-gdb') 1451import libpython 1452end 1453 1454then reloading it after each edit like this: 1455(gdb) python reload(libpython) 1456 1457The following code should ensure that the prettyprinter is registered 1458if the code is autoloaded by gdb when visiting libpython.so, provided 1459that this python file is installed to the same path as the library (or its 1460.debug file) plus a "-gdb.py" suffix, e.g: 1461 /usr/lib/libpython2.6.so.1.0-gdb.py 1462 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py 1463""" 1464def register (obj): 1465 if obj is None: 1466 obj = gdb 1467 1468 # Wire up the pretty-printer 1469 obj.pretty_printers.append(pretty_printer_lookup) 1470 1471register (gdb.current_objfile ()) 1472 1473 1474 1475# Unfortunately, the exact API exposed by the gdb module varies somewhat 1476# from build to build 1477# See http://bugs.python.org/issue8279?#msg102276 1478 1479class Frame(object): 1480 ''' 1481 Wrapper for gdb.Frame, adding various methods 1482 ''' 1483 def __init__(self, gdbframe): 1484 self._gdbframe = gdbframe 1485 1486 def older(self): 1487 older = self._gdbframe.older() 1488 if older: 1489 return Frame(older) 1490 else: 1491 return None 1492 1493 def newer(self): 1494 newer = self._gdbframe.newer() 1495 if newer: 1496 return Frame(newer) 1497 else: 1498 return None 1499 1500 def select(self): 1501 '''If supported, select this frame and return True; return False if unsupported 1502 1503 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12 1504 onwards, but absent on Ubuntu buildbot''' 1505 if not hasattr(self._gdbframe, 'select'): 1506 print ('Unable to select frame: ' 1507 'this build of gdb does not expose a gdb.Frame.select method') 1508 return False 1509 self._gdbframe.select() 1510 return True 1511 1512 def get_index(self): 1513 '''Calculate index of frame, starting at 0 for the newest frame within 1514 this thread''' 1515 index = 0 1516 # Go down until you reach the newest frame: 1517 iter_frame = self 1518 while iter_frame.newer(): 1519 index += 1 1520 iter_frame = iter_frame.newer() 1521 return index 1522 1523 # We divide frames into: 1524 # - "python frames": 1525 # - "bytecode frames" i.e. PyEval_EvalFrameEx 1526 # - "other python frames": things that are of interest from a python 1527 # POV, but aren't bytecode (e.g. GC, GIL) 1528 # - everything else 1529 1530 def is_python_frame(self): 1531 '''Is this a _PyEval_EvalFrameDefault frame, or some other important 1532 frame? (see is_other_python_frame for what "important" means in this 1533 context)''' 1534 if self.is_evalframe(): 1535 return True 1536 if self.is_other_python_frame(): 1537 return True 1538 return False 1539 1540 def is_evalframe(self): 1541 '''Is this a _PyEval_EvalFrameDefault frame?''' 1542 if self._gdbframe.name() == EVALFRAME: 1543 ''' 1544 I believe we also need to filter on the inline 1545 struct frame_id.inline_depth, only regarding frames with 1546 an inline depth of 0 as actually being this function 1547 1548 So we reject those with type gdb.INLINE_FRAME 1549 ''' 1550 if self._gdbframe.type() == gdb.NORMAL_FRAME: 1551 # We have a _PyEval_EvalFrameDefault frame: 1552 return True 1553 1554 return False 1555 1556 def is_other_python_frame(self): 1557 '''Is this frame worth displaying in python backtraces? 1558 Examples: 1559 - waiting on the GIL 1560 - garbage-collecting 1561 - within a CFunction 1562 If it is, return a descriptive string 1563 For other frames, return False 1564 ''' 1565 if self.is_waiting_for_gil(): 1566 return 'Waiting for the GIL' 1567 1568 if self.is_gc_collect(): 1569 return 'Garbage-collecting' 1570 1571 # Detect invocations of PyCFunction instances: 1572 frame = self._gdbframe 1573 caller = frame.name() 1574 if not caller: 1575 return False 1576 1577 if (caller.startswith('cfunction_vectorcall_') or 1578 caller == 'cfunction_call'): 1579 arg_name = 'func' 1580 # Within that frame: 1581 # "func" is the local containing the PyObject* of the 1582 # PyCFunctionObject instance 1583 # "f" is the same value, but cast to (PyCFunctionObject*) 1584 # "self" is the (PyObject*) of the 'self' 1585 try: 1586 # Use the prettyprinter for the func: 1587 func = frame.read_var(arg_name) 1588 return str(func) 1589 except ValueError: 1590 return ('PyCFunction invocation (unable to read %s: ' 1591 'missing debuginfos?)' % arg_name) 1592 except RuntimeError: 1593 return 'PyCFunction invocation (unable to read %s)' % arg_name 1594 1595 if caller == 'wrapper_call': 1596 arg_name = 'wp' 1597 try: 1598 func = frame.read_var(arg_name) 1599 return str(func) 1600 except ValueError: 1601 return ('<wrapper_call invocation (unable to read %s: ' 1602 'missing debuginfos?)>' % arg_name) 1603 except RuntimeError: 1604 return '<wrapper_call invocation (unable to read %s)>' % arg_name 1605 1606 # This frame isn't worth reporting: 1607 return False 1608 1609 def is_waiting_for_gil(self): 1610 '''Is this frame waiting on the GIL?''' 1611 # This assumes the _POSIX_THREADS version of Python/ceval_gil.h: 1612 name = self._gdbframe.name() 1613 if name: 1614 return (name == 'take_gil') 1615 1616 def is_gc_collect(self): 1617 '''Is this frame gc_collect_main() within the garbage-collector?''' 1618 return self._gdbframe.name() in ('collect', 'gc_collect_main') 1619 1620 def get_pyop(self): 1621 try: 1622 f = self._gdbframe.read_var('f') 1623 frame = PyFrameObjectPtr.from_pyobject_ptr(f) 1624 if not frame.is_optimized_out(): 1625 return frame 1626 # gdb is unable to get the "f" argument of PyEval_EvalFrameEx() 1627 # because it was "optimized out". Try to get "f" from the frame 1628 # of the caller, PyEval_EvalCodeEx(). 1629 orig_frame = frame 1630 caller = self._gdbframe.older() 1631 if caller: 1632 f = caller.read_var('f') 1633 frame = PyFrameObjectPtr.from_pyobject_ptr(f) 1634 if not frame.is_optimized_out(): 1635 return frame 1636 return orig_frame 1637 except ValueError: 1638 return None 1639 1640 @classmethod 1641 def get_selected_frame(cls): 1642 _gdbframe = gdb.selected_frame() 1643 if _gdbframe: 1644 return Frame(_gdbframe) 1645 return None 1646 1647 @classmethod 1648 def get_selected_python_frame(cls): 1649 '''Try to obtain the Frame for the python-related code in the selected 1650 frame, or None''' 1651 try: 1652 frame = cls.get_selected_frame() 1653 except gdb.error: 1654 # No frame: Python didn't start yet 1655 return None 1656 1657 while frame: 1658 if frame.is_python_frame(): 1659 return frame 1660 frame = frame.older() 1661 1662 # Not found: 1663 return None 1664 1665 @classmethod 1666 def get_selected_bytecode_frame(cls): 1667 '''Try to obtain the Frame for the python bytecode interpreter in the 1668 selected GDB frame, or None''' 1669 frame = cls.get_selected_frame() 1670 1671 while frame: 1672 if frame.is_evalframe(): 1673 return frame 1674 frame = frame.older() 1675 1676 # Not found: 1677 return None 1678 1679 def print_summary(self): 1680 if self.is_evalframe(): 1681 pyop = self.get_pyop() 1682 if pyop: 1683 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1684 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) 1685 if not pyop.is_optimized_out(): 1686 line = pyop.current_line() 1687 if line is not None: 1688 sys.stdout.write(' %s\n' % line.strip()) 1689 else: 1690 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) 1691 else: 1692 info = self.is_other_python_frame() 1693 if info: 1694 sys.stdout.write('#%i %s\n' % (self.get_index(), info)) 1695 else: 1696 sys.stdout.write('#%i\n' % self.get_index()) 1697 1698 def print_traceback(self): 1699 if self.is_evalframe(): 1700 pyop = self.get_pyop() 1701 if pyop: 1702 pyop.print_traceback() 1703 if not pyop.is_optimized_out(): 1704 line = pyop.current_line() 1705 if line is not None: 1706 sys.stdout.write(' %s\n' % line.strip()) 1707 else: 1708 sys.stdout.write(' (unable to read python frame information)\n') 1709 else: 1710 info = self.is_other_python_frame() 1711 if info: 1712 sys.stdout.write(' %s\n' % info) 1713 else: 1714 sys.stdout.write(' (not a python frame)\n') 1715 1716class PyList(gdb.Command): 1717 '''List the current Python source code, if any 1718 1719 Use 1720 py-list START 1721 to list at a different line number within the python source. 1722 1723 Use 1724 py-list START, END 1725 to list a specific range of lines within the python source. 1726 ''' 1727 1728 def __init__(self): 1729 gdb.Command.__init__ (self, 1730 "py-list", 1731 gdb.COMMAND_FILES, 1732 gdb.COMPLETE_NONE) 1733 1734 1735 def invoke(self, args, from_tty): 1736 import re 1737 1738 start = None 1739 end = None 1740 1741 m = re.match(r'\s*(\d+)\s*', args) 1742 if m: 1743 start = int(m.group(0)) 1744 end = start + 10 1745 1746 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) 1747 if m: 1748 start, end = map(int, m.groups()) 1749 1750 # py-list requires an actual PyEval_EvalFrameEx frame: 1751 frame = Frame.get_selected_bytecode_frame() 1752 if not frame: 1753 print('Unable to locate gdb frame for python bytecode interpreter') 1754 return 1755 1756 pyop = frame.get_pyop() 1757 if not pyop or pyop.is_optimized_out(): 1758 print(UNABLE_READ_INFO_PYTHON_FRAME) 1759 return 1760 1761 filename = pyop.filename() 1762 lineno = pyop.current_line_num() 1763 if lineno is None: 1764 print('Unable to read python frame line number') 1765 return 1766 1767 if start is None: 1768 start = lineno - 5 1769 end = lineno + 5 1770 1771 if start<1: 1772 start = 1 1773 1774 try: 1775 f = open(os_fsencode(filename), 'r') 1776 except IOError as err: 1777 sys.stdout.write('Unable to open %s: %s\n' 1778 % (filename, err)) 1779 return 1780 with f: 1781 all_lines = f.readlines() 1782 # start and end are 1-based, all_lines is 0-based; 1783 # so [start-1:end] as a python slice gives us [start, end] as a 1784 # closed interval 1785 for i, line in enumerate(all_lines[start-1:end]): 1786 linestr = str(i+start) 1787 # Highlight current line: 1788 if i + start == lineno: 1789 linestr = '>' + linestr 1790 sys.stdout.write('%4s %s' % (linestr, line)) 1791 1792 1793# ...and register the command: 1794PyList() 1795 1796def move_in_stack(move_up): 1797 '''Move up or down the stack (for the py-up/py-down command)''' 1798 frame = Frame.get_selected_python_frame() 1799 if not frame: 1800 print('Unable to locate python frame') 1801 return 1802 1803 while frame: 1804 if move_up: 1805 iter_frame = frame.older() 1806 else: 1807 iter_frame = frame.newer() 1808 1809 if not iter_frame: 1810 break 1811 1812 if iter_frame.is_python_frame(): 1813 # Result: 1814 if iter_frame.select(): 1815 iter_frame.print_summary() 1816 return 1817 1818 frame = iter_frame 1819 1820 if move_up: 1821 print('Unable to find an older python frame') 1822 else: 1823 print('Unable to find a newer python frame') 1824 1825class PyUp(gdb.Command): 1826 'Select and print the python stack frame that called this one (if any)' 1827 def __init__(self): 1828 gdb.Command.__init__ (self, 1829 "py-up", 1830 gdb.COMMAND_STACK, 1831 gdb.COMPLETE_NONE) 1832 1833 1834 def invoke(self, args, from_tty): 1835 move_in_stack(move_up=True) 1836 1837class PyDown(gdb.Command): 1838 'Select and print the python stack frame called by this one (if any)' 1839 def __init__(self): 1840 gdb.Command.__init__ (self, 1841 "py-down", 1842 gdb.COMMAND_STACK, 1843 gdb.COMPLETE_NONE) 1844 1845 1846 def invoke(self, args, from_tty): 1847 move_in_stack(move_up=False) 1848 1849# Not all builds of gdb have gdb.Frame.select 1850if hasattr(gdb.Frame, 'select'): 1851 PyUp() 1852 PyDown() 1853 1854class PyBacktraceFull(gdb.Command): 1855 'Display the current python frame and all the frames within its call stack (if any)' 1856 def __init__(self): 1857 gdb.Command.__init__ (self, 1858 "py-bt-full", 1859 gdb.COMMAND_STACK, 1860 gdb.COMPLETE_NONE) 1861 1862 1863 def invoke(self, args, from_tty): 1864 frame = Frame.get_selected_python_frame() 1865 if not frame: 1866 print('Unable to locate python frame') 1867 return 1868 1869 while frame: 1870 if frame.is_python_frame(): 1871 frame.print_summary() 1872 frame = frame.older() 1873 1874PyBacktraceFull() 1875 1876class PyBacktrace(gdb.Command): 1877 'Display the current python frame and all the frames within its call stack (if any)' 1878 def __init__(self): 1879 gdb.Command.__init__ (self, 1880 "py-bt", 1881 gdb.COMMAND_STACK, 1882 gdb.COMPLETE_NONE) 1883 1884 1885 def invoke(self, args, from_tty): 1886 frame = Frame.get_selected_python_frame() 1887 if not frame: 1888 print('Unable to locate python frame') 1889 return 1890 1891 sys.stdout.write('Traceback (most recent call first):\n') 1892 while frame: 1893 if frame.is_python_frame(): 1894 frame.print_traceback() 1895 frame = frame.older() 1896 1897PyBacktrace() 1898 1899class PyPrint(gdb.Command): 1900 'Look up the given python variable name, and print it' 1901 def __init__(self): 1902 gdb.Command.__init__ (self, 1903 "py-print", 1904 gdb.COMMAND_DATA, 1905 gdb.COMPLETE_NONE) 1906 1907 1908 def invoke(self, args, from_tty): 1909 name = str(args) 1910 1911 frame = Frame.get_selected_python_frame() 1912 if not frame: 1913 print('Unable to locate python frame') 1914 return 1915 1916 pyop_frame = frame.get_pyop() 1917 if not pyop_frame: 1918 print(UNABLE_READ_INFO_PYTHON_FRAME) 1919 return 1920 1921 pyop_var, scope = pyop_frame.get_var_by_name(name) 1922 1923 if pyop_var: 1924 print('%s %r = %s' 1925 % (scope, 1926 name, 1927 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) 1928 else: 1929 print('%r not found' % name) 1930 1931PyPrint() 1932 1933class PyLocals(gdb.Command): 1934 'Look up the given python variable name, and print it' 1935 def __init__(self): 1936 gdb.Command.__init__ (self, 1937 "py-locals", 1938 gdb.COMMAND_DATA, 1939 gdb.COMPLETE_NONE) 1940 1941 1942 def invoke(self, args, from_tty): 1943 name = str(args) 1944 1945 frame = Frame.get_selected_python_frame() 1946 if not frame: 1947 print('Unable to locate python frame') 1948 return 1949 1950 pyop_frame = frame.get_pyop() 1951 if not pyop_frame: 1952 print(UNABLE_READ_INFO_PYTHON_FRAME) 1953 return 1954 1955 for pyop_name, pyop_value in pyop_frame.iter_locals(): 1956 print('%s = %s' 1957 % (pyop_name.proxyval(set()), 1958 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN))) 1959 1960PyLocals() 1961