1#!/usr/bin/python 2''' 3From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb 4to be extended with Python code e.g. for library-specific data visualizations, 5such as for the C++ STL types. Documentation on this API can be seen at: 6http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html 7 8 9This python module deals with the case when the process being debugged (the 10"inferior process" in gdb parlance) is itself python, or more specifically, 11linked against libpython. In this situation, almost every item of data is a 12(PyObject*), and having the debugger merely print their addresses is not very 13enlightening. 14 15This module embeds knowledge about the implementation details of libpython so 16that we can emit useful visualizations e.g. a string, a list, a dict, a frame 17giving file/line information and the state of local variables 18 19In particular, given a gdb.Value corresponding to a PyObject* in the inferior 20process, we can generate a "proxy value" within the gdb process. For example, 21given a PyObject* in the inferior process that is in fact a PyListObject* 22holding three PyObject* that turn out to be PyBytesObject* instances, we can 23generate a proxy value within the gdb process that is a list of bytes 24instances: 25 [b"foo", b"bar", b"baz"] 26 27Doing so can be expensive for complicated graphs of objects, and could take 28some time, so we also have a "write_repr" method that writes a representation 29of the data to a file-like object. This allows us to stop the traversal by 30having the file-like object raise an exception if it gets too much data. 31 32With both "proxyval" and "write_repr" we keep track of the set of all addresses 33visited so far in the traversal, to avoid infinite recursion due to cycles in 34the graph of object references. 35 36We try to defer gdb.lookup_type() invocations for python types until as late as 37possible: for a dynamically linked python binary, when the process starts in 38the debugger, the libpython.so hasn't been dynamically loaded yet, so none of 39the type names are known to the debugger 40 41The module also extends gdb with some python-specific commands. 42''' 43 44# NOTE: some gdbs are linked with Python 3, so this file should be dual-syntax 45# compatible (2.6+ and 3.0+). See #19308. 46 47from __future__ import print_function 48import gdb 49import os 50import locale 51import sys 52 53if sys.version_info[0] >= 3: 54 unichr = chr 55 xrange = range 56 long = int 57 58# Look up the gdb.Type for some standard types: 59# Those need to be refreshed as types (pointer sizes) may change when 60# gdb loads different executables 61 62def _type_char_ptr(): 63 return gdb.lookup_type('char').pointer() # char* 64 65 66def _type_unsigned_char_ptr(): 67 return gdb.lookup_type('unsigned char').pointer() # unsigned char* 68 69 70def _type_unsigned_short_ptr(): 71 return gdb.lookup_type('unsigned short').pointer() 72 73 74def _type_unsigned_int_ptr(): 75 return gdb.lookup_type('unsigned int').pointer() 76 77 78def _sizeof_void_p(): 79 return gdb.lookup_type('void').pointer().sizeof 80 81 82# value computed later, see PyUnicodeObjectPtr.proxy() 83_is_pep393 = None 84 85Py_TPFLAGS_HEAPTYPE = (1 << 9) 86Py_TPFLAGS_LONG_SUBCLASS = (1 << 24) 87Py_TPFLAGS_LIST_SUBCLASS = (1 << 25) 88Py_TPFLAGS_TUPLE_SUBCLASS = (1 << 26) 89Py_TPFLAGS_BYTES_SUBCLASS = (1 << 27) 90Py_TPFLAGS_UNICODE_SUBCLASS = (1 << 28) 91Py_TPFLAGS_DICT_SUBCLASS = (1 << 29) 92Py_TPFLAGS_BASE_EXC_SUBCLASS = (1 << 30) 93Py_TPFLAGS_TYPE_SUBCLASS = (1 << 31) 94 95 96MAX_OUTPUT_LEN=1024 97 98hexdigits = "0123456789abcdef" 99 100ENCODING = locale.getpreferredencoding() 101 102class NullPyObjectPtr(RuntimeError): 103 pass 104 105 106def safety_limit(val): 107 # Given an integer value from the process being debugged, limit it to some 108 # safety threshold so that arbitrary breakage within said process doesn't 109 # break the gdb process too much (e.g. sizes of iterations, sizes of lists) 110 return min(val, 1000) 111 112 113def safe_range(val): 114 # As per range, but don't trust the value too much: cap it to a safety 115 # threshold in case the data was corrupted 116 return xrange(safety_limit(int(val))) 117 118if sys.version_info[0] >= 3: 119 def write_unicode(file, text): 120 file.write(text) 121else: 122 def write_unicode(file, text): 123 # Write a byte or unicode string to file. Unicode strings are encoded to 124 # ENCODING encoding with 'backslashreplace' error handler to avoid 125 # UnicodeEncodeError. 126 if isinstance(text, unicode): 127 text = text.encode(ENCODING, 'backslashreplace') 128 file.write(text) 129 130try: 131 os_fsencode = os.fsencode 132except AttributeError: 133 def os_fsencode(filename): 134 if not isinstance(filename, unicode): 135 return filename 136 encoding = sys.getfilesystemencoding() 137 if encoding == 'mbcs': 138 # mbcs doesn't support surrogateescape 139 return filename.encode(encoding) 140 encoded = [] 141 for char in filename: 142 # surrogateescape error handler 143 if 0xDC80 <= ord(char) <= 0xDCFF: 144 byte = chr(ord(char) - 0xDC00) 145 else: 146 byte = char.encode(encoding) 147 encoded.append(byte) 148 return ''.join(encoded) 149 150class StringTruncated(RuntimeError): 151 pass 152 153class TruncatedStringIO(object): 154 '''Similar to io.StringIO, but can truncate the output by raising a 155 StringTruncated exception''' 156 def __init__(self, maxlen=None): 157 self._val = '' 158 self.maxlen = maxlen 159 160 def write(self, data): 161 if self.maxlen: 162 if len(data) + len(self._val) > self.maxlen: 163 # Truncation: 164 self._val += data[0:self.maxlen - len(self._val)] 165 raise StringTruncated() 166 167 self._val += data 168 169 def getvalue(self): 170 return self._val 171 172class PyObjectPtr(object): 173 """ 174 Class wrapping a gdb.Value that's either a (PyObject*) within the 175 inferior process, or some subclass pointer e.g. (PyBytesObject*) 176 177 There will be a subclass for every refined PyObject type that we care 178 about. 179 180 Note that at every stage the underlying pointer could be NULL, point 181 to corrupt data, etc; this is the debugger, after all. 182 """ 183 _typename = 'PyObject' 184 185 def __init__(self, gdbval, cast_to=None): 186 if cast_to: 187 self._gdbval = gdbval.cast(cast_to) 188 else: 189 self._gdbval = gdbval 190 191 def field(self, name): 192 ''' 193 Get the gdb.Value for the given field within the PyObject, coping with 194 some python 2 versus python 3 differences. 195 196 Various libpython types are defined using the "PyObject_HEAD" and 197 "PyObject_VAR_HEAD" macros. 198 199 In Python 2, this these are defined so that "ob_type" and (for a var 200 object) "ob_size" are fields of the type in question. 201 202 In Python 3, this is defined as an embedded PyVarObject type thus: 203 PyVarObject ob_base; 204 so that the "ob_size" field is located insize the "ob_base" field, and 205 the "ob_type" is most easily accessed by casting back to a (PyObject*). 206 ''' 207 if self.is_null(): 208 raise NullPyObjectPtr(self) 209 210 if name == 'ob_type': 211 pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type()) 212 return pyo_ptr.dereference()[name] 213 214 if name == 'ob_size': 215 pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type()) 216 return pyo_ptr.dereference()[name] 217 218 # General case: look it up inside the object: 219 return self._gdbval.dereference()[name] 220 221 def pyop_field(self, name): 222 ''' 223 Get a PyObjectPtr for the given PyObject* field within this PyObject, 224 coping with some python 2 versus python 3 differences. 225 ''' 226 return PyObjectPtr.from_pyobject_ptr(self.field(name)) 227 228 def write_field_repr(self, name, out, visited): 229 ''' 230 Extract the PyObject* field named "name", and write its representation 231 to file-like object "out" 232 ''' 233 field_obj = self.pyop_field(name) 234 field_obj.write_repr(out, visited) 235 236 def get_truncated_repr(self, maxlen): 237 ''' 238 Get a repr-like string for the data, but truncate it at "maxlen" bytes 239 (ending the object graph traversal as soon as you do) 240 ''' 241 out = TruncatedStringIO(maxlen) 242 try: 243 self.write_repr(out, set()) 244 except StringTruncated: 245 # Truncation occurred: 246 return out.getvalue() + '...(truncated)' 247 248 # No truncation occurred: 249 return out.getvalue() 250 251 def type(self): 252 return PyTypeObjectPtr(self.field('ob_type')) 253 254 def is_null(self): 255 return 0 == long(self._gdbval) 256 257 def is_optimized_out(self): 258 ''' 259 Is the value of the underlying PyObject* visible to the debugger? 260 261 This can vary with the precise version of the compiler used to build 262 Python, and the precise version of gdb. 263 264 See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with 265 PyEval_EvalFrameEx's "f" 266 ''' 267 return self._gdbval.is_optimized_out 268 269 def safe_tp_name(self): 270 try: 271 return self.type().field('tp_name').string() 272 except NullPyObjectPtr: 273 # NULL tp_name? 274 return 'unknown' 275 except RuntimeError: 276 # Can't even read the object at all? 277 return 'unknown' 278 279 def proxyval(self, visited): 280 ''' 281 Scrape a value from the inferior process, and try to represent it 282 within the gdb process, whilst (hopefully) avoiding crashes when 283 the remote data is corrupt. 284 285 Derived classes will override this. 286 287 For example, a PyIntObject* with ob_ival 42 in the inferior process 288 should result in an int(42) in this process. 289 290 visited: a set of all gdb.Value pyobject pointers already visited 291 whilst generating this value (to guard against infinite recursion when 292 visiting object graphs with loops). Analogous to Py_ReprEnter and 293 Py_ReprLeave 294 ''' 295 296 class FakeRepr(object): 297 """ 298 Class representing a non-descript PyObject* value in the inferior 299 process for when we don't have a custom scraper, intended to have 300 a sane repr(). 301 """ 302 303 def __init__(self, tp_name, address): 304 self.tp_name = tp_name 305 self.address = address 306 307 def __repr__(self): 308 # For the NULL pointer, we have no way of knowing a type, so 309 # special-case it as per 310 # http://bugs.python.org/issue8032#msg100882 311 if self.address == 0: 312 return '0x0' 313 return '<%s at remote 0x%x>' % (self.tp_name, self.address) 314 315 return FakeRepr(self.safe_tp_name(), 316 long(self._gdbval)) 317 318 def write_repr(self, out, visited): 319 ''' 320 Write a string representation of the value scraped from the inferior 321 process to "out", a file-like object. 322 ''' 323 # Default implementation: generate a proxy value and write its repr 324 # However, this could involve a lot of work for complicated objects, 325 # so for derived classes we specialize this 326 return out.write(repr(self.proxyval(visited))) 327 328 @classmethod 329 def subclass_from_type(cls, t): 330 ''' 331 Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a 332 (PyTypeObject*), determine the corresponding subclass of PyObjectPtr 333 to use 334 335 Ideally, we would look up the symbols for the global types, but that 336 isn't working yet: 337 (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value 338 Traceback (most recent call last): 339 File "<string>", line 1, in <module> 340 NotImplementedError: Symbol type not yet supported in Python scripts. 341 Error while executing Python code. 342 343 For now, we use tp_flags, after doing some string comparisons on the 344 tp_name for some special-cases that don't seem to be visible through 345 flags 346 ''' 347 try: 348 tp_name = t.field('tp_name').string() 349 tp_flags = int(t.field('tp_flags')) 350 except RuntimeError: 351 # Handle any kind of error e.g. NULL ptrs by simply using the base 352 # class 353 return cls 354 355 #print('tp_flags = 0x%08x' % tp_flags) 356 #print('tp_name = %r' % tp_name) 357 358 name_map = {'bool': PyBoolObjectPtr, 359 'classobj': PyClassObjectPtr, 360 'NoneType': PyNoneStructPtr, 361 'frame': PyFrameObjectPtr, 362 'set' : PySetObjectPtr, 363 'frozenset' : PySetObjectPtr, 364 'builtin_function_or_method' : PyCFunctionObjectPtr, 365 } 366 if tp_name in name_map: 367 return name_map[tp_name] 368 369 if tp_flags & Py_TPFLAGS_HEAPTYPE: 370 return HeapTypeObjectPtr 371 372 if tp_flags & Py_TPFLAGS_LONG_SUBCLASS: 373 return PyLongObjectPtr 374 if tp_flags & Py_TPFLAGS_LIST_SUBCLASS: 375 return PyListObjectPtr 376 if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS: 377 return PyTupleObjectPtr 378 if tp_flags & Py_TPFLAGS_BYTES_SUBCLASS: 379 return PyBytesObjectPtr 380 if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS: 381 return PyUnicodeObjectPtr 382 if tp_flags & Py_TPFLAGS_DICT_SUBCLASS: 383 return PyDictObjectPtr 384 if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS: 385 return PyBaseExceptionObjectPtr 386 #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS: 387 # return PyTypeObjectPtr 388 389 # Use the base class: 390 return cls 391 392 @classmethod 393 def from_pyobject_ptr(cls, gdbval): 394 ''' 395 Try to locate the appropriate derived class dynamically, and cast 396 the pointer accordingly. 397 ''' 398 try: 399 p = PyObjectPtr(gdbval) 400 cls = cls.subclass_from_type(p.type()) 401 return cls(gdbval, cast_to=cls.get_gdb_type()) 402 except RuntimeError: 403 # Handle any kind of error e.g. NULL ptrs by simply using the base 404 # class 405 pass 406 return cls(gdbval) 407 408 @classmethod 409 def get_gdb_type(cls): 410 return gdb.lookup_type(cls._typename).pointer() 411 412 def as_address(self): 413 return long(self._gdbval) 414 415class PyVarObjectPtr(PyObjectPtr): 416 _typename = 'PyVarObject' 417 418class ProxyAlreadyVisited(object): 419 ''' 420 Placeholder proxy to use when protecting against infinite recursion due to 421 loops in the object graph. 422 423 Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave 424 ''' 425 def __init__(self, rep): 426 self._rep = rep 427 428 def __repr__(self): 429 return self._rep 430 431 432def _write_instance_repr(out, visited, name, pyop_attrdict, address): 433 '''Shared code for use by all classes: 434 write a representation to file-like object "out"''' 435 out.write('<') 436 out.write(name) 437 438 # Write dictionary of instance attributes: 439 if isinstance(pyop_attrdict, PyDictObjectPtr): 440 out.write('(') 441 first = True 442 for pyop_arg, pyop_val in pyop_attrdict.iteritems(): 443 if not first: 444 out.write(', ') 445 first = False 446 out.write(pyop_arg.proxyval(visited)) 447 out.write('=') 448 pyop_val.write_repr(out, visited) 449 out.write(')') 450 out.write(' at remote 0x%x>' % address) 451 452 453class InstanceProxy(object): 454 455 def __init__(self, cl_name, attrdict, address): 456 self.cl_name = cl_name 457 self.attrdict = attrdict 458 self.address = address 459 460 def __repr__(self): 461 if isinstance(self.attrdict, dict): 462 kwargs = ', '.join(["%s=%r" % (arg, val) 463 for arg, val in self.attrdict.iteritems()]) 464 return '<%s(%s) at remote 0x%x>' % (self.cl_name, 465 kwargs, self.address) 466 else: 467 return '<%s at remote 0x%x>' % (self.cl_name, 468 self.address) 469 470def _PyObject_VAR_SIZE(typeobj, nitems): 471 if _PyObject_VAR_SIZE._type_size_t is None: 472 _PyObject_VAR_SIZE._type_size_t = gdb.lookup_type('size_t') 473 474 return ( ( typeobj.field('tp_basicsize') + 475 nitems * typeobj.field('tp_itemsize') + 476 (_sizeof_void_p() - 1) 477 ) & ~(_sizeof_void_p() - 1) 478 ).cast(_PyObject_VAR_SIZE._type_size_t) 479_PyObject_VAR_SIZE._type_size_t = None 480 481class HeapTypeObjectPtr(PyObjectPtr): 482 _typename = 'PyObject' 483 484 def get_attr_dict(self): 485 ''' 486 Get the PyDictObject ptr representing the attribute dictionary 487 (or None if there's a problem) 488 ''' 489 try: 490 typeobj = self.type() 491 dictoffset = int_from_int(typeobj.field('tp_dictoffset')) 492 if dictoffset != 0: 493 if dictoffset < 0: 494 type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer() 495 tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size']) 496 if tsize < 0: 497 tsize = -tsize 498 size = _PyObject_VAR_SIZE(typeobj, tsize) 499 dictoffset += size 500 assert dictoffset > 0 501 assert dictoffset % _sizeof_void_p() == 0 502 503 dictptr = self._gdbval.cast(_type_char_ptr()) + dictoffset 504 PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer() 505 dictptr = dictptr.cast(PyObjectPtrPtr) 506 return PyObjectPtr.from_pyobject_ptr(dictptr.dereference()) 507 except RuntimeError: 508 # Corrupt data somewhere; fail safe 509 pass 510 511 # Not found, or some kind of error: 512 return None 513 514 def proxyval(self, visited): 515 ''' 516 Support for classes. 517 518 Currently we just locate the dictionary using a transliteration to 519 python of _PyObject_GetDictPtr, ignoring descriptors 520 ''' 521 # Guard against infinite loops: 522 if self.as_address() in visited: 523 return ProxyAlreadyVisited('<...>') 524 visited.add(self.as_address()) 525 526 pyop_attr_dict = self.get_attr_dict() 527 if pyop_attr_dict: 528 attr_dict = pyop_attr_dict.proxyval(visited) 529 else: 530 attr_dict = {} 531 tp_name = self.safe_tp_name() 532 533 # Class: 534 return InstanceProxy(tp_name, attr_dict, long(self._gdbval)) 535 536 def write_repr(self, out, visited): 537 # Guard against infinite loops: 538 if self.as_address() in visited: 539 out.write('<...>') 540 return 541 visited.add(self.as_address()) 542 543 pyop_attrdict = self.get_attr_dict() 544 _write_instance_repr(out, visited, 545 self.safe_tp_name(), pyop_attrdict, self.as_address()) 546 547class ProxyException(Exception): 548 def __init__(self, tp_name, args): 549 self.tp_name = tp_name 550 self.args = args 551 552 def __repr__(self): 553 return '%s%r' % (self.tp_name, self.args) 554 555class PyBaseExceptionObjectPtr(PyObjectPtr): 556 """ 557 Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception 558 within the process being debugged. 559 """ 560 _typename = 'PyBaseExceptionObject' 561 562 def proxyval(self, visited): 563 # Guard against infinite loops: 564 if self.as_address() in visited: 565 return ProxyAlreadyVisited('(...)') 566 visited.add(self.as_address()) 567 arg_proxy = self.pyop_field('args').proxyval(visited) 568 return ProxyException(self.safe_tp_name(), 569 arg_proxy) 570 571 def write_repr(self, out, visited): 572 # Guard against infinite loops: 573 if self.as_address() in visited: 574 out.write('(...)') 575 return 576 visited.add(self.as_address()) 577 578 out.write(self.safe_tp_name()) 579 self.write_field_repr('args', out, visited) 580 581class PyClassObjectPtr(PyObjectPtr): 582 """ 583 Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj> 584 instance within the process being debugged. 585 """ 586 _typename = 'PyClassObject' 587 588 589class BuiltInFunctionProxy(object): 590 def __init__(self, ml_name): 591 self.ml_name = ml_name 592 593 def __repr__(self): 594 return "<built-in function %s>" % self.ml_name 595 596class BuiltInMethodProxy(object): 597 def __init__(self, ml_name, pyop_m_self): 598 self.ml_name = ml_name 599 self.pyop_m_self = pyop_m_self 600 601 def __repr__(self): 602 return ('<built-in method %s of %s object at remote 0x%x>' 603 % (self.ml_name, 604 self.pyop_m_self.safe_tp_name(), 605 self.pyop_m_self.as_address()) 606 ) 607 608class PyCFunctionObjectPtr(PyObjectPtr): 609 """ 610 Class wrapping a gdb.Value that's a PyCFunctionObject* 611 (see Include/methodobject.h and Objects/methodobject.c) 612 """ 613 _typename = 'PyCFunctionObject' 614 615 def proxyval(self, visited): 616 m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*) 617 ml_name = m_ml['ml_name'].string() 618 619 pyop_m_self = self.pyop_field('m_self') 620 if pyop_m_self.is_null(): 621 return BuiltInFunctionProxy(ml_name) 622 else: 623 return BuiltInMethodProxy(ml_name, pyop_m_self) 624 625 626class PyCodeObjectPtr(PyObjectPtr): 627 """ 628 Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance 629 within the process being debugged. 630 """ 631 _typename = 'PyCodeObject' 632 633 def addr2line(self, addrq): 634 ''' 635 Get the line number for a given bytecode offset 636 637 Analogous to PyCode_Addr2Line; translated from pseudocode in 638 Objects/lnotab_notes.txt 639 ''' 640 co_lnotab = self.pyop_field('co_lnotab').proxyval(set()) 641 642 # Initialize lineno to co_firstlineno as per PyCode_Addr2Line 643 # not 0, as lnotab_notes.txt has it: 644 lineno = int_from_int(self.field('co_firstlineno')) 645 646 addr = 0 647 for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]): 648 addr += ord(addr_incr) 649 if addr > addrq: 650 return lineno 651 lineno += ord(line_incr) 652 return lineno 653 654 655class PyDictObjectPtr(PyObjectPtr): 656 """ 657 Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance 658 within the process being debugged. 659 """ 660 _typename = 'PyDictObject' 661 662 def iteritems(self): 663 ''' 664 Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, 665 analogous to dict.iteritems() 666 ''' 667 keys = self.field('ma_keys') 668 values = self.field('ma_values') 669 entries, nentries = self._get_entries(keys) 670 for i in safe_range(nentries): 671 ep = entries[i] 672 if long(values): 673 pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) 674 else: 675 pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) 676 if not pyop_value.is_null(): 677 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) 678 yield (pyop_key, pyop_value) 679 680 def proxyval(self, visited): 681 # Guard against infinite loops: 682 if self.as_address() in visited: 683 return ProxyAlreadyVisited('{...}') 684 visited.add(self.as_address()) 685 686 result = {} 687 for pyop_key, pyop_value in self.iteritems(): 688 proxy_key = pyop_key.proxyval(visited) 689 proxy_value = pyop_value.proxyval(visited) 690 result[proxy_key] = proxy_value 691 return result 692 693 def write_repr(self, out, visited): 694 # Guard against infinite loops: 695 if self.as_address() in visited: 696 out.write('{...}') 697 return 698 visited.add(self.as_address()) 699 700 out.write('{') 701 first = True 702 for pyop_key, pyop_value in self.iteritems(): 703 if not first: 704 out.write(', ') 705 first = False 706 pyop_key.write_repr(out, visited) 707 out.write(': ') 708 pyop_value.write_repr(out, visited) 709 out.write('}') 710 711 def _get_entries(self, keys): 712 dk_nentries = int(keys['dk_nentries']) 713 dk_size = int(keys['dk_size']) 714 try: 715 # <= Python 3.5 716 return keys['dk_entries'], dk_size 717 except gdb.error: 718 # >= Python 3.6 719 pass 720 721 if dk_size <= 0xFF: 722 offset = dk_size 723 elif dk_size <= 0xFFFF: 724 offset = 2 * dk_size 725 elif dk_size <= 0xFFFFFFFF: 726 offset = 4 * dk_size 727 else: 728 offset = 8 * dk_size 729 730 ent_addr = keys['dk_indices']['as_1'].address 731 ent_addr = ent_addr.cast(_type_unsigned_char_ptr()) + offset 732 ent_ptr_t = gdb.lookup_type('PyDictKeyEntry').pointer() 733 ent_addr = ent_addr.cast(ent_ptr_t) 734 735 return ent_addr, dk_nentries 736 737 738class PyListObjectPtr(PyObjectPtr): 739 _typename = 'PyListObject' 740 741 def __getitem__(self, i): 742 # Get the gdb.Value for the (PyObject*) with the given index: 743 field_ob_item = self.field('ob_item') 744 return field_ob_item[i] 745 746 def proxyval(self, visited): 747 # Guard against infinite loops: 748 if self.as_address() in visited: 749 return ProxyAlreadyVisited('[...]') 750 visited.add(self.as_address()) 751 752 result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 753 for i in safe_range(int_from_int(self.field('ob_size')))] 754 return result 755 756 def write_repr(self, out, visited): 757 # Guard against infinite loops: 758 if self.as_address() in visited: 759 out.write('[...]') 760 return 761 visited.add(self.as_address()) 762 763 out.write('[') 764 for i in safe_range(int_from_int(self.field('ob_size'))): 765 if i > 0: 766 out.write(', ') 767 element = PyObjectPtr.from_pyobject_ptr(self[i]) 768 element.write_repr(out, visited) 769 out.write(']') 770 771class PyLongObjectPtr(PyObjectPtr): 772 _typename = 'PyLongObject' 773 774 def proxyval(self, visited): 775 ''' 776 Python's Include/longobjrep.h has this declaration: 777 struct _longobject { 778 PyObject_VAR_HEAD 779 digit ob_digit[1]; 780 }; 781 782 with this description: 783 The absolute value of a number is equal to 784 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) 785 Negative numbers are represented with ob_size < 0; 786 zero is represented by ob_size == 0. 787 788 where SHIFT can be either: 789 #define PyLong_SHIFT 30 790 #define PyLong_SHIFT 15 791 ''' 792 ob_size = long(self.field('ob_size')) 793 if ob_size == 0: 794 return 0 795 796 ob_digit = self.field('ob_digit') 797 798 if gdb.lookup_type('digit').sizeof == 2: 799 SHIFT = 15 800 else: 801 SHIFT = 30 802 803 digits = [long(ob_digit[i]) * 2**(SHIFT*i) 804 for i in safe_range(abs(ob_size))] 805 result = sum(digits) 806 if ob_size < 0: 807 result = -result 808 return result 809 810 def write_repr(self, out, visited): 811 # Write this out as a Python 3 int literal, i.e. without the "L" suffix 812 proxy = self.proxyval(visited) 813 out.write("%s" % proxy) 814 815 816class PyBoolObjectPtr(PyLongObjectPtr): 817 """ 818 Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two 819 <bool> instances (Py_True/Py_False) within the process being debugged. 820 """ 821 def proxyval(self, visited): 822 if PyLongObjectPtr.proxyval(self, visited): 823 return True 824 else: 825 return False 826 827class PyNoneStructPtr(PyObjectPtr): 828 """ 829 Class wrapping a gdb.Value that's a PyObject* pointing to the 830 singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type 831 """ 832 _typename = 'PyObject' 833 834 def proxyval(self, visited): 835 return None 836 837 838class PyFrameObjectPtr(PyObjectPtr): 839 _typename = 'PyFrameObject' 840 841 def __init__(self, gdbval, cast_to=None): 842 PyObjectPtr.__init__(self, gdbval, cast_to) 843 844 if not self.is_optimized_out(): 845 self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code')) 846 self.co_name = self.co.pyop_field('co_name') 847 self.co_filename = self.co.pyop_field('co_filename') 848 849 self.f_lineno = int_from_int(self.field('f_lineno')) 850 self.f_lasti = int_from_int(self.field('f_lasti')) 851 self.co_nlocals = int_from_int(self.co.field('co_nlocals')) 852 self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames')) 853 854 def iter_locals(self): 855 ''' 856 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 857 the local variables of this frame 858 ''' 859 if self.is_optimized_out(): 860 return 861 862 f_localsplus = self.field('f_localsplus') 863 for i in safe_range(self.co_nlocals): 864 pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i]) 865 if not pyop_value.is_null(): 866 pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i]) 867 yield (pyop_name, pyop_value) 868 869 def iter_globals(self): 870 ''' 871 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 872 the global variables of this frame 873 ''' 874 if self.is_optimized_out(): 875 return () 876 877 pyop_globals = self.pyop_field('f_globals') 878 return pyop_globals.iteritems() 879 880 def iter_builtins(self): 881 ''' 882 Yield a sequence of (name,value) pairs of PyObjectPtr instances, for 883 the builtin variables 884 ''' 885 if self.is_optimized_out(): 886 return () 887 888 pyop_builtins = self.pyop_field('f_builtins') 889 return pyop_builtins.iteritems() 890 891 def get_var_by_name(self, name): 892 ''' 893 Look for the named local variable, returning a (PyObjectPtr, scope) pair 894 where scope is a string 'local', 'global', 'builtin' 895 896 If not found, return (None, None) 897 ''' 898 for pyop_name, pyop_value in self.iter_locals(): 899 if name == pyop_name.proxyval(set()): 900 return pyop_value, 'local' 901 for pyop_name, pyop_value in self.iter_globals(): 902 if name == pyop_name.proxyval(set()): 903 return pyop_value, 'global' 904 for pyop_name, pyop_value in self.iter_builtins(): 905 if name == pyop_name.proxyval(set()): 906 return pyop_value, 'builtin' 907 return None, None 908 909 def filename(self): 910 '''Get the path of the current Python source file, as a string''' 911 if self.is_optimized_out(): 912 return '(frame information optimized out)' 913 return self.co_filename.proxyval(set()) 914 915 def current_line_num(self): 916 '''Get current line number as an integer (1-based) 917 918 Translated from PyFrame_GetLineNumber and PyCode_Addr2Line 919 920 See Objects/lnotab_notes.txt 921 ''' 922 if self.is_optimized_out(): 923 return None 924 f_trace = self.field('f_trace') 925 if long(f_trace) != 0: 926 # we have a non-NULL f_trace: 927 return self.f_lineno 928 else: 929 #try: 930 return self.co.addr2line(self.f_lasti) 931 #except ValueError: 932 # return self.f_lineno 933 934 def current_line(self): 935 '''Get the text of the current source line as a string, with a trailing 936 newline character''' 937 if self.is_optimized_out(): 938 return '(frame information optimized out)' 939 filename = self.filename() 940 try: 941 f = open(os_fsencode(filename), 'r') 942 except IOError: 943 return None 944 with f: 945 all_lines = f.readlines() 946 # Convert from 1-based current_line_num to 0-based list offset: 947 return all_lines[self.current_line_num()-1] 948 949 def write_repr(self, out, visited): 950 if self.is_optimized_out(): 951 out.write('(frame information optimized out)') 952 return 953 out.write('Frame 0x%x, for file %s, line %i, in %s (' 954 % (self.as_address(), 955 self.co_filename.proxyval(visited), 956 self.current_line_num(), 957 self.co_name.proxyval(visited))) 958 first = True 959 for pyop_name, pyop_value in self.iter_locals(): 960 if not first: 961 out.write(', ') 962 first = False 963 964 out.write(pyop_name.proxyval(visited)) 965 out.write('=') 966 pyop_value.write_repr(out, visited) 967 968 out.write(')') 969 970 def print_traceback(self): 971 if self.is_optimized_out(): 972 sys.stdout.write(' (frame information optimized out)\n') 973 return 974 visited = set() 975 sys.stdout.write(' File "%s", line %i, in %s\n' 976 % (self.co_filename.proxyval(visited), 977 self.current_line_num(), 978 self.co_name.proxyval(visited))) 979 980class PySetObjectPtr(PyObjectPtr): 981 _typename = 'PySetObject' 982 983 @classmethod 984 def _dummy_key(self): 985 return gdb.lookup_global_symbol('_PySet_Dummy').value() 986 987 def __iter__(self): 988 dummy_ptr = self._dummy_key() 989 table = self.field('table') 990 for i in safe_range(self.field('mask') + 1): 991 setentry = table[i] 992 key = setentry['key'] 993 if key != 0 and key != dummy_ptr: 994 yield PyObjectPtr.from_pyobject_ptr(key) 995 996 def proxyval(self, visited): 997 # Guard against infinite loops: 998 if self.as_address() in visited: 999 return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name()) 1000 visited.add(self.as_address()) 1001 1002 members = (key.proxyval(visited) for key in self) 1003 if self.safe_tp_name() == 'frozenset': 1004 return frozenset(members) 1005 else: 1006 return set(members) 1007 1008 def write_repr(self, out, visited): 1009 # Emulate Python 3's set_repr 1010 tp_name = self.safe_tp_name() 1011 1012 # Guard against infinite loops: 1013 if self.as_address() in visited: 1014 out.write('(...)') 1015 return 1016 visited.add(self.as_address()) 1017 1018 # Python 3's set_repr special-cases the empty set: 1019 if not self.field('used'): 1020 out.write(tp_name) 1021 out.write('()') 1022 return 1023 1024 # Python 3 uses {} for set literals: 1025 if tp_name != 'set': 1026 out.write(tp_name) 1027 out.write('(') 1028 1029 out.write('{') 1030 first = True 1031 for key in self: 1032 if not first: 1033 out.write(', ') 1034 first = False 1035 key.write_repr(out, visited) 1036 out.write('}') 1037 1038 if tp_name != 'set': 1039 out.write(')') 1040 1041 1042class PyBytesObjectPtr(PyObjectPtr): 1043 _typename = 'PyBytesObject' 1044 1045 def __str__(self): 1046 field_ob_size = self.field('ob_size') 1047 field_ob_sval = self.field('ob_sval') 1048 char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr()) 1049 return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)]) 1050 1051 def proxyval(self, visited): 1052 return str(self) 1053 1054 def write_repr(self, out, visited): 1055 # Write this out as a Python 3 bytes literal, i.e. with a "b" prefix 1056 1057 # Get a PyStringObject* within the Python 2 gdb process: 1058 proxy = self.proxyval(visited) 1059 1060 # Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr 1061 # to Python 2 code: 1062 quote = "'" 1063 if "'" in proxy and not '"' in proxy: 1064 quote = '"' 1065 out.write('b') 1066 out.write(quote) 1067 for byte in proxy: 1068 if byte == quote or byte == '\\': 1069 out.write('\\') 1070 out.write(byte) 1071 elif byte == '\t': 1072 out.write('\\t') 1073 elif byte == '\n': 1074 out.write('\\n') 1075 elif byte == '\r': 1076 out.write('\\r') 1077 elif byte < ' ' or ord(byte) >= 0x7f: 1078 out.write('\\x') 1079 out.write(hexdigits[(ord(byte) & 0xf0) >> 4]) 1080 out.write(hexdigits[ord(byte) & 0xf]) 1081 else: 1082 out.write(byte) 1083 out.write(quote) 1084 1085class PyTupleObjectPtr(PyObjectPtr): 1086 _typename = 'PyTupleObject' 1087 1088 def __getitem__(self, i): 1089 # Get the gdb.Value for the (PyObject*) with the given index: 1090 field_ob_item = self.field('ob_item') 1091 return field_ob_item[i] 1092 1093 def proxyval(self, visited): 1094 # Guard against infinite loops: 1095 if self.as_address() in visited: 1096 return ProxyAlreadyVisited('(...)') 1097 visited.add(self.as_address()) 1098 1099 result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited) 1100 for i in safe_range(int_from_int(self.field('ob_size')))]) 1101 return result 1102 1103 def write_repr(self, out, visited): 1104 # Guard against infinite loops: 1105 if self.as_address() in visited: 1106 out.write('(...)') 1107 return 1108 visited.add(self.as_address()) 1109 1110 out.write('(') 1111 for i in safe_range(int_from_int(self.field('ob_size'))): 1112 if i > 0: 1113 out.write(', ') 1114 element = PyObjectPtr.from_pyobject_ptr(self[i]) 1115 element.write_repr(out, visited) 1116 if self.field('ob_size') == 1: 1117 out.write(',)') 1118 else: 1119 out.write(')') 1120 1121class PyTypeObjectPtr(PyObjectPtr): 1122 _typename = 'PyTypeObject' 1123 1124 1125def _unichr_is_printable(char): 1126 # Logic adapted from Python 3's Tools/unicode/makeunicodedata.py 1127 if char == u" ": 1128 return True 1129 import unicodedata 1130 return unicodedata.category(char) not in ("C", "Z") 1131 1132if sys.maxunicode >= 0x10000: 1133 _unichr = unichr 1134else: 1135 # Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb 1136 def _unichr(x): 1137 if x < 0x10000: 1138 return unichr(x) 1139 x -= 0x10000 1140 ch1 = 0xD800 | (x >> 10) 1141 ch2 = 0xDC00 | (x & 0x3FF) 1142 return unichr(ch1) + unichr(ch2) 1143 1144 1145class PyUnicodeObjectPtr(PyObjectPtr): 1146 _typename = 'PyUnicodeObject' 1147 1148 def char_width(self): 1149 _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') 1150 return _type_Py_UNICODE.sizeof 1151 1152 def proxyval(self, visited): 1153 global _is_pep393 1154 if _is_pep393 is None: 1155 fields = gdb.lookup_type('PyUnicodeObject').target().fields() 1156 _is_pep393 = 'data' in [f.name for f in fields] 1157 if _is_pep393: 1158 # Python 3.3 and newer 1159 may_have_surrogates = False 1160 compact = self.field('_base') 1161 ascii = compact['_base'] 1162 state = ascii['state'] 1163 is_compact_ascii = (int(state['ascii']) and int(state['compact'])) 1164 if not int(state['ready']): 1165 # string is not ready 1166 field_length = long(compact['wstr_length']) 1167 may_have_surrogates = True 1168 field_str = ascii['wstr'] 1169 else: 1170 field_length = long(ascii['length']) 1171 if is_compact_ascii: 1172 field_str = ascii.address + 1 1173 elif int(state['compact']): 1174 field_str = compact.address + 1 1175 else: 1176 field_str = self.field('data')['any'] 1177 repr_kind = int(state['kind']) 1178 if repr_kind == 1: 1179 field_str = field_str.cast(_type_unsigned_char_ptr()) 1180 elif repr_kind == 2: 1181 field_str = field_str.cast(_type_unsigned_short_ptr()) 1182 elif repr_kind == 4: 1183 field_str = field_str.cast(_type_unsigned_int_ptr()) 1184 else: 1185 # Python 3.2 and earlier 1186 field_length = long(self.field('length')) 1187 field_str = self.field('str') 1188 may_have_surrogates = self.char_width() == 2 1189 1190 # Gather a list of ints from the Py_UNICODE array; these are either 1191 # UCS-1, UCS-2 or UCS-4 code points: 1192 if not may_have_surrogates: 1193 Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] 1194 else: 1195 # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the 1196 # inferior process: we must join surrogate pairs. 1197 Py_UNICODEs = [] 1198 i = 0 1199 limit = safety_limit(field_length) 1200 while i < limit: 1201 ucs = int(field_str[i]) 1202 i += 1 1203 if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length: 1204 Py_UNICODEs.append(ucs) 1205 continue 1206 # This could be a surrogate pair. 1207 ucs2 = int(field_str[i]) 1208 if ucs2 < 0xDC00 or ucs2 > 0xDFFF: 1209 continue 1210 code = (ucs & 0x03FF) << 10 1211 code |= ucs2 & 0x03FF 1212 code += 0x00010000 1213 Py_UNICODEs.append(code) 1214 i += 1 1215 1216 # Convert the int code points to unicode characters, and generate a 1217 # local unicode instance. 1218 # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb). 1219 result = u''.join([ 1220 (_unichr(ucs) if ucs <= 0x10ffff else '\ufffd') 1221 for ucs in Py_UNICODEs]) 1222 return result 1223 1224 def write_repr(self, out, visited): 1225 # Write this out as a Python 3 str literal, i.e. without a "u" prefix 1226 1227 # Get a PyUnicodeObject* within the Python 2 gdb process: 1228 proxy = self.proxyval(visited) 1229 1230 # Transliteration of Python 3's Object/unicodeobject.c:unicode_repr 1231 # to Python 2: 1232 if "'" in proxy and '"' not in proxy: 1233 quote = '"' 1234 else: 1235 quote = "'" 1236 out.write(quote) 1237 1238 i = 0 1239 while i < len(proxy): 1240 ch = proxy[i] 1241 i += 1 1242 1243 # Escape quotes and backslashes 1244 if ch == quote or ch == '\\': 1245 out.write('\\') 1246 out.write(ch) 1247 1248 # Map special whitespace to '\t', \n', '\r' 1249 elif ch == '\t': 1250 out.write('\\t') 1251 elif ch == '\n': 1252 out.write('\\n') 1253 elif ch == '\r': 1254 out.write('\\r') 1255 1256 # Map non-printable US ASCII to '\xhh' */ 1257 elif ch < ' ' or ch == 0x7F: 1258 out.write('\\x') 1259 out.write(hexdigits[(ord(ch) >> 4) & 0x000F]) 1260 out.write(hexdigits[ord(ch) & 0x000F]) 1261 1262 # Copy ASCII characters as-is 1263 elif ord(ch) < 0x7F: 1264 out.write(ch) 1265 1266 # Non-ASCII characters 1267 else: 1268 ucs = ch 1269 ch2 = None 1270 if sys.maxunicode < 0x10000: 1271 # If sizeof(Py_UNICODE) is 2 here (in gdb), join 1272 # surrogate pairs before calling _unichr_is_printable. 1273 if (i < len(proxy) 1274 and 0xD800 <= ord(ch) < 0xDC00 \ 1275 and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): 1276 ch2 = proxy[i] 1277 ucs = ch + ch2 1278 i += 1 1279 1280 # Unfortuately, Python 2's unicode type doesn't seem 1281 # to expose the "isprintable" method 1282 printable = _unichr_is_printable(ucs) 1283 if printable: 1284 try: 1285 ucs.encode(ENCODING) 1286 except UnicodeEncodeError: 1287 printable = False 1288 1289 # Map Unicode whitespace and control characters 1290 # (categories Z* and C* except ASCII space) 1291 if not printable: 1292 if ch2 is not None: 1293 # Match Python 3's representation of non-printable 1294 # wide characters. 1295 code = (ord(ch) & 0x03FF) << 10 1296 code |= ord(ch2) & 0x03FF 1297 code += 0x00010000 1298 else: 1299 code = ord(ucs) 1300 1301 # Map 8-bit characters to '\\xhh' 1302 if code <= 0xff: 1303 out.write('\\x') 1304 out.write(hexdigits[(code >> 4) & 0x000F]) 1305 out.write(hexdigits[code & 0x000F]) 1306 # Map 21-bit characters to '\U00xxxxxx' 1307 elif code >= 0x10000: 1308 out.write('\\U') 1309 out.write(hexdigits[(code >> 28) & 0x0000000F]) 1310 out.write(hexdigits[(code >> 24) & 0x0000000F]) 1311 out.write(hexdigits[(code >> 20) & 0x0000000F]) 1312 out.write(hexdigits[(code >> 16) & 0x0000000F]) 1313 out.write(hexdigits[(code >> 12) & 0x0000000F]) 1314 out.write(hexdigits[(code >> 8) & 0x0000000F]) 1315 out.write(hexdigits[(code >> 4) & 0x0000000F]) 1316 out.write(hexdigits[code & 0x0000000F]) 1317 # Map 16-bit characters to '\uxxxx' 1318 else: 1319 out.write('\\u') 1320 out.write(hexdigits[(code >> 12) & 0x000F]) 1321 out.write(hexdigits[(code >> 8) & 0x000F]) 1322 out.write(hexdigits[(code >> 4) & 0x000F]) 1323 out.write(hexdigits[code & 0x000F]) 1324 else: 1325 # Copy characters as-is 1326 out.write(ch) 1327 if ch2 is not None: 1328 out.write(ch2) 1329 1330 out.write(quote) 1331 1332 1333 1334 1335def int_from_int(gdbval): 1336 return int(str(gdbval)) 1337 1338 1339def stringify(val): 1340 # TODO: repr() puts everything on one line; pformat can be nicer, but 1341 # can lead to v.long results; this function isolates the choice 1342 if True: 1343 return repr(val) 1344 else: 1345 from pprint import pformat 1346 return pformat(val) 1347 1348 1349class PyObjectPtrPrinter: 1350 "Prints a (PyObject*)" 1351 1352 def __init__ (self, gdbval): 1353 self.gdbval = gdbval 1354 1355 def to_string (self): 1356 pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval) 1357 if True: 1358 return pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1359 else: 1360 # Generate full proxy value then stringify it. 1361 # Doing so could be expensive 1362 proxyval = pyop.proxyval(set()) 1363 return stringify(proxyval) 1364 1365def pretty_printer_lookup(gdbval): 1366 type = gdbval.type.unqualified() 1367 if type.code == gdb.TYPE_CODE_PTR: 1368 type = type.target().unqualified() 1369 t = str(type) 1370 if t in ("PyObject", "PyFrameObject", "PyUnicodeObject"): 1371 return PyObjectPtrPrinter(gdbval) 1372 1373""" 1374During development, I've been manually invoking the code in this way: 1375(gdb) python 1376 1377import sys 1378sys.path.append('/home/david/coding/python-gdb') 1379import libpython 1380end 1381 1382then reloading it after each edit like this: 1383(gdb) python reload(libpython) 1384 1385The following code should ensure that the prettyprinter is registered 1386if the code is autoloaded by gdb when visiting libpython.so, provided 1387that this python file is installed to the same path as the library (or its 1388.debug file) plus a "-gdb.py" suffix, e.g: 1389 /usr/lib/libpython2.6.so.1.0-gdb.py 1390 /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py 1391""" 1392def register (obj): 1393 if obj is None: 1394 obj = gdb 1395 1396 # Wire up the pretty-printer 1397 obj.pretty_printers.append(pretty_printer_lookup) 1398 1399register (gdb.current_objfile ()) 1400 1401 1402 1403# Unfortunately, the exact API exposed by the gdb module varies somewhat 1404# from build to build 1405# See http://bugs.python.org/issue8279?#msg102276 1406 1407class Frame(object): 1408 ''' 1409 Wrapper for gdb.Frame, adding various methods 1410 ''' 1411 def __init__(self, gdbframe): 1412 self._gdbframe = gdbframe 1413 1414 def older(self): 1415 older = self._gdbframe.older() 1416 if older: 1417 return Frame(older) 1418 else: 1419 return None 1420 1421 def newer(self): 1422 newer = self._gdbframe.newer() 1423 if newer: 1424 return Frame(newer) 1425 else: 1426 return None 1427 1428 def select(self): 1429 '''If supported, select this frame and return True; return False if unsupported 1430 1431 Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12 1432 onwards, but absent on Ubuntu buildbot''' 1433 if not hasattr(self._gdbframe, 'select'): 1434 print ('Unable to select frame: ' 1435 'this build of gdb does not expose a gdb.Frame.select method') 1436 return False 1437 self._gdbframe.select() 1438 return True 1439 1440 def get_index(self): 1441 '''Calculate index of frame, starting at 0 for the newest frame within 1442 this thread''' 1443 index = 0 1444 # Go down until you reach the newest frame: 1445 iter_frame = self 1446 while iter_frame.newer(): 1447 index += 1 1448 iter_frame = iter_frame.newer() 1449 return index 1450 1451 # We divide frames into: 1452 # - "python frames": 1453 # - "bytecode frames" i.e. PyEval_EvalFrameEx 1454 # - "other python frames": things that are of interest from a python 1455 # POV, but aren't bytecode (e.g. GC, GIL) 1456 # - everything else 1457 1458 def is_python_frame(self): 1459 '''Is this a PyEval_EvalFrameEx frame, or some other important 1460 frame? (see is_other_python_frame for what "important" means in this 1461 context)''' 1462 if self.is_evalframeex(): 1463 return True 1464 if self.is_other_python_frame(): 1465 return True 1466 return False 1467 1468 def is_evalframeex(self): 1469 '''Is this a PyEval_EvalFrameEx frame?''' 1470 if self._gdbframe.name() == 'PyEval_EvalFrameEx': 1471 ''' 1472 I believe we also need to filter on the inline 1473 struct frame_id.inline_depth, only regarding frames with 1474 an inline depth of 0 as actually being this function 1475 1476 So we reject those with type gdb.INLINE_FRAME 1477 ''' 1478 if self._gdbframe.type() == gdb.NORMAL_FRAME: 1479 # We have a PyEval_EvalFrameEx frame: 1480 return True 1481 1482 return False 1483 1484 def is_other_python_frame(self): 1485 '''Is this frame worth displaying in python backtraces? 1486 Examples: 1487 - waiting on the GIL 1488 - garbage-collecting 1489 - within a CFunction 1490 If it is, return a descriptive string 1491 For other frames, return False 1492 ''' 1493 if self.is_waiting_for_gil(): 1494 return 'Waiting for the GIL' 1495 1496 if self.is_gc_collect(): 1497 return 'Garbage-collecting' 1498 1499 # Detect invocations of PyCFunction instances: 1500 older = self.older() 1501 if not older: 1502 return False 1503 1504 caller = older._gdbframe.name() 1505 if not caller: 1506 return False 1507 1508 if caller == 'PyCFunction_Call': 1509 # Within that frame: 1510 # "func" is the local containing the PyObject* of the 1511 # PyCFunctionObject instance 1512 # "f" is the same value, but cast to (PyCFunctionObject*) 1513 # "self" is the (PyObject*) of the 'self' 1514 try: 1515 # Use the prettyprinter for the func: 1516 func = older._gdbframe.read_var('func') 1517 return str(func) 1518 except RuntimeError: 1519 return 'PyCFunction invocation (unable to read "func")' 1520 1521 elif caller == '_PyCFunction_FastCallDict': 1522 try: 1523 func = older._gdbframe.read_var('func_obj') 1524 return str(func) 1525 except RuntimeError: 1526 return 'PyCFunction invocation (unable to read "func_obj")' 1527 1528 # This frame isn't worth reporting: 1529 return False 1530 1531 def is_waiting_for_gil(self): 1532 '''Is this frame waiting on the GIL?''' 1533 # This assumes the _POSIX_THREADS version of Python/ceval_gil.h: 1534 name = self._gdbframe.name() 1535 if name: 1536 return 'pthread_cond_timedwait' in name 1537 1538 def is_gc_collect(self): 1539 '''Is this frame "collect" within the garbage-collector?''' 1540 return self._gdbframe.name() == 'collect' 1541 1542 def get_pyop(self): 1543 try: 1544 f = self._gdbframe.read_var('f') 1545 frame = PyFrameObjectPtr.from_pyobject_ptr(f) 1546 if not frame.is_optimized_out(): 1547 return frame 1548 # gdb is unable to get the "f" argument of PyEval_EvalFrameEx() 1549 # because it was "optimized out". Try to get "f" from the frame 1550 # of the caller, PyEval_EvalCodeEx(). 1551 orig_frame = frame 1552 caller = self._gdbframe.older() 1553 if caller: 1554 f = caller.read_var('f') 1555 frame = PyFrameObjectPtr.from_pyobject_ptr(f) 1556 if not frame.is_optimized_out(): 1557 return frame 1558 return orig_frame 1559 except ValueError: 1560 return None 1561 1562 @classmethod 1563 def get_selected_frame(cls): 1564 _gdbframe = gdb.selected_frame() 1565 if _gdbframe: 1566 return Frame(_gdbframe) 1567 return None 1568 1569 @classmethod 1570 def get_selected_python_frame(cls): 1571 '''Try to obtain the Frame for the python-related code in the selected 1572 frame, or None''' 1573 try: 1574 frame = cls.get_selected_frame() 1575 except gdb.error: 1576 # No frame: Python didn't start yet 1577 return None 1578 1579 while frame: 1580 if frame.is_python_frame(): 1581 return frame 1582 frame = frame.older() 1583 1584 # Not found: 1585 return None 1586 1587 @classmethod 1588 def get_selected_bytecode_frame(cls): 1589 '''Try to obtain the Frame for the python bytecode interpreter in the 1590 selected GDB frame, or None''' 1591 frame = cls.get_selected_frame() 1592 1593 while frame: 1594 if frame.is_evalframeex(): 1595 return frame 1596 frame = frame.older() 1597 1598 # Not found: 1599 return None 1600 1601 def print_summary(self): 1602 if self.is_evalframeex(): 1603 pyop = self.get_pyop() 1604 if pyop: 1605 line = pyop.get_truncated_repr(MAX_OUTPUT_LEN) 1606 write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line)) 1607 if not pyop.is_optimized_out(): 1608 line = pyop.current_line() 1609 if line is not None: 1610 sys.stdout.write(' %s\n' % line.strip()) 1611 else: 1612 sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index()) 1613 else: 1614 info = self.is_other_python_frame() 1615 if info: 1616 sys.stdout.write('#%i %s\n' % (self.get_index(), info)) 1617 else: 1618 sys.stdout.write('#%i\n' % self.get_index()) 1619 1620 def print_traceback(self): 1621 if self.is_evalframeex(): 1622 pyop = self.get_pyop() 1623 if pyop: 1624 pyop.print_traceback() 1625 if not pyop.is_optimized_out(): 1626 line = pyop.current_line() 1627 if line is not None: 1628 sys.stdout.write(' %s\n' % line.strip()) 1629 else: 1630 sys.stdout.write(' (unable to read python frame information)\n') 1631 else: 1632 info = self.is_other_python_frame() 1633 if info: 1634 sys.stdout.write(' %s\n' % info) 1635 else: 1636 sys.stdout.write(' (not a python frame)\n') 1637 1638class PyList(gdb.Command): 1639 '''List the current Python source code, if any 1640 1641 Use 1642 py-list START 1643 to list at a different line number within the python source. 1644 1645 Use 1646 py-list START, END 1647 to list a specific range of lines within the python source. 1648 ''' 1649 1650 def __init__(self): 1651 gdb.Command.__init__ (self, 1652 "py-list", 1653 gdb.COMMAND_FILES, 1654 gdb.COMPLETE_NONE) 1655 1656 1657 def invoke(self, args, from_tty): 1658 import re 1659 1660 start = None 1661 end = None 1662 1663 m = re.match(r'\s*(\d+)\s*', args) 1664 if m: 1665 start = int(m.group(0)) 1666 end = start + 10 1667 1668 m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args) 1669 if m: 1670 start, end = map(int, m.groups()) 1671 1672 # py-list requires an actual PyEval_EvalFrameEx frame: 1673 frame = Frame.get_selected_bytecode_frame() 1674 if not frame: 1675 print('Unable to locate gdb frame for python bytecode interpreter') 1676 return 1677 1678 pyop = frame.get_pyop() 1679 if not pyop or pyop.is_optimized_out(): 1680 print('Unable to read information on python frame') 1681 return 1682 1683 filename = pyop.filename() 1684 lineno = pyop.current_line_num() 1685 1686 if start is None: 1687 start = lineno - 5 1688 end = lineno + 5 1689 1690 if start<1: 1691 start = 1 1692 1693 try: 1694 f = open(os_fsencode(filename), 'r') 1695 except IOError as err: 1696 sys.stdout.write('Unable to open %s: %s\n' 1697 % (filename, err)) 1698 return 1699 with f: 1700 all_lines = f.readlines() 1701 # start and end are 1-based, all_lines is 0-based; 1702 # so [start-1:end] as a python slice gives us [start, end] as a 1703 # closed interval 1704 for i, line in enumerate(all_lines[start-1:end]): 1705 linestr = str(i+start) 1706 # Highlight current line: 1707 if i + start == lineno: 1708 linestr = '>' + linestr 1709 sys.stdout.write('%4s %s' % (linestr, line)) 1710 1711 1712# ...and register the command: 1713PyList() 1714 1715def move_in_stack(move_up): 1716 '''Move up or down the stack (for the py-up/py-down command)''' 1717 frame = Frame.get_selected_python_frame() 1718 if not frame: 1719 print('Unable to locate python frame') 1720 return 1721 1722 while frame: 1723 if move_up: 1724 iter_frame = frame.older() 1725 else: 1726 iter_frame = frame.newer() 1727 1728 if not iter_frame: 1729 break 1730 1731 if iter_frame.is_python_frame(): 1732 # Result: 1733 if iter_frame.select(): 1734 iter_frame.print_summary() 1735 return 1736 1737 frame = iter_frame 1738 1739 if move_up: 1740 print('Unable to find an older python frame') 1741 else: 1742 print('Unable to find a newer python frame') 1743 1744class PyUp(gdb.Command): 1745 'Select and print the python stack frame that called this one (if any)' 1746 def __init__(self): 1747 gdb.Command.__init__ (self, 1748 "py-up", 1749 gdb.COMMAND_STACK, 1750 gdb.COMPLETE_NONE) 1751 1752 1753 def invoke(self, args, from_tty): 1754 move_in_stack(move_up=True) 1755 1756class PyDown(gdb.Command): 1757 'Select and print the python stack frame called by this one (if any)' 1758 def __init__(self): 1759 gdb.Command.__init__ (self, 1760 "py-down", 1761 gdb.COMMAND_STACK, 1762 gdb.COMPLETE_NONE) 1763 1764 1765 def invoke(self, args, from_tty): 1766 move_in_stack(move_up=False) 1767 1768# Not all builds of gdb have gdb.Frame.select 1769if hasattr(gdb.Frame, 'select'): 1770 PyUp() 1771 PyDown() 1772 1773class PyBacktraceFull(gdb.Command): 1774 'Display the current python frame and all the frames within its call stack (if any)' 1775 def __init__(self): 1776 gdb.Command.__init__ (self, 1777 "py-bt-full", 1778 gdb.COMMAND_STACK, 1779 gdb.COMPLETE_NONE) 1780 1781 1782 def invoke(self, args, from_tty): 1783 frame = Frame.get_selected_python_frame() 1784 if not frame: 1785 print('Unable to locate python frame') 1786 return 1787 1788 while frame: 1789 if frame.is_python_frame(): 1790 frame.print_summary() 1791 frame = frame.older() 1792 1793PyBacktraceFull() 1794 1795class PyBacktrace(gdb.Command): 1796 'Display the current python frame and all the frames within its call stack (if any)' 1797 def __init__(self): 1798 gdb.Command.__init__ (self, 1799 "py-bt", 1800 gdb.COMMAND_STACK, 1801 gdb.COMPLETE_NONE) 1802 1803 1804 def invoke(self, args, from_tty): 1805 frame = Frame.get_selected_python_frame() 1806 if not frame: 1807 print('Unable to locate python frame') 1808 return 1809 1810 sys.stdout.write('Traceback (most recent call first):\n') 1811 while frame: 1812 if frame.is_python_frame(): 1813 frame.print_traceback() 1814 frame = frame.older() 1815 1816PyBacktrace() 1817 1818class PyPrint(gdb.Command): 1819 'Look up the given python variable name, and print it' 1820 def __init__(self): 1821 gdb.Command.__init__ (self, 1822 "py-print", 1823 gdb.COMMAND_DATA, 1824 gdb.COMPLETE_NONE) 1825 1826 1827 def invoke(self, args, from_tty): 1828 name = str(args) 1829 1830 frame = Frame.get_selected_python_frame() 1831 if not frame: 1832 print('Unable to locate python frame') 1833 return 1834 1835 pyop_frame = frame.get_pyop() 1836 if not pyop_frame: 1837 print('Unable to read information on python frame') 1838 return 1839 1840 pyop_var, scope = pyop_frame.get_var_by_name(name) 1841 1842 if pyop_var: 1843 print('%s %r = %s' 1844 % (scope, 1845 name, 1846 pyop_var.get_truncated_repr(MAX_OUTPUT_LEN))) 1847 else: 1848 print('%r not found' % name) 1849 1850PyPrint() 1851 1852class PyLocals(gdb.Command): 1853 'Look up the given python variable name, and print it' 1854 def __init__(self): 1855 gdb.Command.__init__ (self, 1856 "py-locals", 1857 gdb.COMMAND_DATA, 1858 gdb.COMPLETE_NONE) 1859 1860 1861 def invoke(self, args, from_tty): 1862 name = str(args) 1863 1864 frame = Frame.get_selected_python_frame() 1865 if not frame: 1866 print('Unable to locate python frame') 1867 return 1868 1869 pyop_frame = frame.get_pyop() 1870 if not pyop_frame: 1871 print('Unable to read information on python frame') 1872 return 1873 1874 for pyop_name, pyop_value in pyop_frame.iter_locals(): 1875 print('%s = %s' 1876 % (pyop_name.proxyval(set()), 1877 pyop_value.get_truncated_repr(MAX_OUTPUT_LEN))) 1878 1879PyLocals() 1880